1 #define PETSCMAT_DLL 2 3 /* 4 Support for the parallel AIJ matrix vector multiply 5 */ 6 #include "../src/mat/impls/aij/mpi/mpiaij.h" 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatSetUpMultiply_MPIAIJ" 10 PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat) 11 { 12 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 13 Mat_SeqAIJ *B = (Mat_SeqAIJ*)(aij->B->data); 14 PetscErrorCode ierr; 15 PetscInt i,j,*aj = B->j,ec = 0,*garray; 16 IS from,to; 17 Vec gvec; 18 PetscTruth useblockis; 19 #if defined (PETSC_USE_CTABLE) 20 PetscTable gid1_lid1; 21 PetscTablePosition tpos; 22 PetscInt gid,lid; 23 #else 24 PetscInt N = mat->cmap->N,*indices; 25 #endif 26 27 PetscFunctionBegin; 28 29 #if defined (PETSC_USE_CTABLE) 30 /* use a table - Mark Adams */ 31 ierr = PetscTableCreate(aij->B->rmap->n,&gid1_lid1);CHKERRQ(ierr); 32 for (i=0; i<aij->B->rmap->n; i++) { 33 for (j=0; j<B->ilen[i]; j++) { 34 PetscInt data,gid1 = aj[B->i[i] + j] + 1; 35 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 36 if (!data) { 37 /* one based table */ 38 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 39 } 40 } 41 } 42 /* form array of columns we need */ 43 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 44 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 45 while (tpos) { 46 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 47 gid--; 48 lid--; 49 garray[lid] = gid; 50 } 51 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); /* sort, and rebuild */ 52 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 53 for (i=0; i<ec; i++) { 54 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 55 } 56 /* compact out the extra columns in B */ 57 for (i=0; i<aij->B->rmap->n; i++) { 58 for (j=0; j<B->ilen[i]; j++) { 59 PetscInt gid1 = aj[B->i[i] + j] + 1; 60 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 61 lid --; 62 aj[B->i[i] + j] = lid; 63 } 64 } 65 aij->B->cmap->n = aij->B->cmap->N = ec; 66 ierr = PetscMapSetUp((aij->B->cmap));CHKERRQ(ierr); 67 ierr = PetscTableDestroy(gid1_lid1);CHKERRQ(ierr); 68 /* Mark Adams */ 69 #else 70 /* Make an array as long as the number of columns */ 71 /* mark those columns that are in aij->B */ 72 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr); 73 ierr = PetscMemzero(indices,N*sizeof(PetscInt));CHKERRQ(ierr); 74 for (i=0; i<aij->B->rmap->n; i++) { 75 for (j=0; j<B->ilen[i]; j++) { 76 if (!indices[aj[B->i[i] + j] ]) ec++; 77 indices[aj[B->i[i] + j] ] = 1; 78 } 79 } 80 81 /* form array of columns we need */ 82 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 83 ec = 0; 84 for (i=0; i<N; i++) { 85 if (indices[i]) garray[ec++] = i; 86 } 87 88 /* make indices now point into garray */ 89 for (i=0; i<ec; i++) { 90 indices[garray[i]] = i; 91 } 92 93 /* compact out the extra columns in B */ 94 for (i=0; i<aij->B->rmap->n; i++) { 95 for (j=0; j<B->ilen[i]; j++) { 96 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 97 } 98 } 99 aij->B->cmap->n = aij->B->cmap->N = ec; 100 ierr = PetscMapSetUp((aij->B->cmap));CHKERRQ(ierr); 101 ierr = PetscFree(indices);CHKERRQ(ierr); 102 #endif 103 /* create local vector that is used to scatter into */ 104 ierr = VecCreateSeq(PETSC_COMM_SELF,ec,&aij->lvec);CHKERRQ(ierr); 105 106 /* create two temporary Index sets for build scatter gather */ 107 /* check for the special case where blocks are communicated for faster VecScatterXXX */ 108 useblockis = PETSC_TRUE; 109 if (mat->rmap->bs > 1) { 110 PetscInt bs = mat->rmap->bs,ibs,ga; 111 if (!(ec % bs)) { 112 for (i=0; i<ec/bs; i++) { 113 if ((ga = garray[ibs = i*bs]) % bs) { 114 useblockis = PETSC_FALSE; 115 break; 116 } 117 for (j=1; j<bs; j++) { 118 if (garray[ibs+j] != ga+j) { 119 useblockis = PETSC_FALSE; 120 break; 121 } 122 } 123 if (!useblockis) break; 124 } 125 } 126 } 127 if (useblockis) { 128 PetscInt *ga,bs = mat->rmap->bs,iec = ec/bs; 129 ierr = PetscInfo(mat,"Using block index set to define scatter\n"); 130 ierr = PetscMalloc((ec/mat->rmap->bs)*sizeof(PetscInt),&ga);CHKERRQ(ierr); 131 for (i=0; i<iec; i++) ga[i] = garray[i*bs]; 132 ierr = ISCreateBlock(((PetscObject)mat)->comm,bs,iec,ga,&from);CHKERRQ(ierr); 133 ierr = PetscFree(ga);CHKERRQ(ierr); 134 } else { 135 ierr = ISCreateGeneral(((PetscObject)mat)->comm,ec,garray,&from);CHKERRQ(ierr); 136 } 137 ierr = ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to);CHKERRQ(ierr); 138 139 /* create temporary global vector to generate scatter context */ 140 /* This does not allocate the array's memory so is efficient */ 141 ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 142 143 /* generate the scatter context */ 144 ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx);CHKERRQ(ierr); 145 ierr = PetscLogObjectParent(mat,aij->Mvctx);CHKERRQ(ierr); 146 ierr = PetscLogObjectParent(mat,aij->lvec);CHKERRQ(ierr); 147 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 148 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 149 aij->garray = garray; 150 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 151 ierr = ISDestroy(from);CHKERRQ(ierr); 152 ierr = ISDestroy(to);CHKERRQ(ierr); 153 ierr = VecDestroy(gvec);CHKERRQ(ierr); 154 PetscFunctionReturn(0); 155 } 156 157 158 #undef __FUNCT__ 159 #define __FUNCT__ "DisAssemble_MPIAIJ" 160 /* 161 Takes the local part of an already assembled MPIAIJ matrix 162 and disassembles it. This is to allow new nonzeros into the matrix 163 that require more communication in the matrix vector multiply. 164 Thus certain data-structures must be rebuilt. 165 166 Kind of slow! But that's what application programmers get when 167 they are sloppy. 168 */ 169 PetscErrorCode DisAssemble_MPIAIJ(Mat A) 170 { 171 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 172 Mat B = aij->B,Bnew; 173 Mat_SeqAIJ *Baij = (Mat_SeqAIJ*)B->data; 174 PetscErrorCode ierr; 175 PetscInt i,j,m = B->rmap->n,n = A->cmap->N,col,ct = 0,*garray = aij->garray,*nz,ec; 176 PetscScalar v; 177 178 PetscFunctionBegin; 179 /* free stuff related to matrix-vec multiply */ 180 ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 181 ierr = VecDestroy(aij->lvec);CHKERRQ(ierr); aij->lvec = 0; 182 ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr); aij->Mvctx = 0; 183 if (aij->colmap) { 184 #if defined (PETSC_USE_CTABLE) 185 ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr); 186 aij->colmap = 0; 187 #else 188 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 189 aij->colmap = 0; 190 ierr = PetscLogObjectMemory(A,-aij->B->cmap->n*sizeof(PetscInt));CHKERRQ(ierr); 191 #endif 192 } 193 194 /* make sure that B is assembled so we can access its values */ 195 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 196 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 197 198 /* invent new B and copy stuff over */ 199 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&nz);CHKERRQ(ierr); 200 for (i=0; i<m; i++) { 201 nz[i] = Baij->i[i+1] - Baij->i[i]; 202 } 203 ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr); 204 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 205 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 206 ierr = MatSeqAIJSetPreallocation(Bnew,0,nz);CHKERRQ(ierr); 207 ierr = PetscFree(nz);CHKERRQ(ierr); 208 for (i=0; i<m; i++) { 209 for (j=Baij->i[i]; j<Baij->i[i+1]; j++) { 210 col = garray[Baij->j[ct]]; 211 v = Baij->a[ct++]; 212 ierr = MatSetValues(Bnew,1,&i,1,&col,&v,B->insertmode);CHKERRQ(ierr); 213 } 214 } 215 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 216 aij->garray = 0; 217 ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 218 ierr = MatDestroy(B);CHKERRQ(ierr); 219 ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr); 220 aij->B = Bnew; 221 A->was_assembled = PETSC_FALSE; 222 PetscFunctionReturn(0); 223 } 224 225 /* ugly stuff added for Glenn someday we should fix this up */ 226 227 static PetscInt *auglyrmapd = 0,*auglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" 228 parts of the local matrix */ 229 static Vec auglydd = 0,auglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 230 231 232 #undef __FUNCT__ 233 #define __FUNCT__ "MatMPIAIJDiagonalScaleLocalSetUp" 234 PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 235 { 236 Mat_MPIAIJ *ina = (Mat_MPIAIJ*) inA->data; /*access private part of matrix */ 237 PetscErrorCode ierr; 238 PetscInt i,n,nt,cstart,cend,no,*garray = ina->garray,*lindices; 239 PetscInt *r_rmapd,*r_rmapo; 240 241 PetscFunctionBegin; 242 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 243 ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr); 244 ierr = PetscMalloc((inA->mapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr); 245 ierr = PetscMemzero(r_rmapd,inA->mapping->n*sizeof(PetscInt));CHKERRQ(ierr); 246 nt = 0; 247 for (i=0; i<inA->mapping->n; i++) { 248 if (inA->mapping->indices[i] >= cstart && inA->mapping->indices[i] < cend) { 249 nt++; 250 r_rmapd[i] = inA->mapping->indices[i] + 1; 251 } 252 } 253 if (nt != n) SETERRQ2(PETSC_ERR_PLIB,"Hmm nt %D n %D",nt,n); 254 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&auglyrmapd);CHKERRQ(ierr); 255 for (i=0; i<inA->mapping->n; i++) { 256 if (r_rmapd[i]){ 257 auglyrmapd[(r_rmapd[i]-1)-cstart] = i; 258 } 259 } 260 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 261 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&auglydd);CHKERRQ(ierr); 262 263 ierr = PetscMalloc((inA->cmap->N+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr); 264 ierr = PetscMemzero(lindices,inA->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 265 for (i=0; i<ina->B->cmap->n; i++) { 266 lindices[garray[i]] = i+1; 267 } 268 no = inA->mapping->n - nt; 269 ierr = PetscMalloc((inA->mapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr); 270 ierr = PetscMemzero(r_rmapo,inA->mapping->n*sizeof(PetscInt));CHKERRQ(ierr); 271 nt = 0; 272 for (i=0; i<inA->mapping->n; i++) { 273 if (lindices[inA->mapping->indices[i]]) { 274 nt++; 275 r_rmapo[i] = lindices[inA->mapping->indices[i]]; 276 } 277 } 278 if (nt > no) SETERRQ2(PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 279 ierr = PetscFree(lindices);CHKERRQ(ierr); 280 ierr = PetscMalloc((nt+1)*sizeof(PetscInt),&auglyrmapo);CHKERRQ(ierr); 281 for (i=0; i<inA->mapping->n; i++) { 282 if (r_rmapo[i]){ 283 auglyrmapo[(r_rmapo[i]-1)] = i; 284 } 285 } 286 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 287 ierr = VecCreateSeq(PETSC_COMM_SELF,nt,&auglyoo);CHKERRQ(ierr); 288 289 PetscFunctionReturn(0); 290 } 291 292 #undef __FUNCT__ 293 #define __FUNCT__ "MatMPIAIJDiagonalScaleLocal" 294 PetscErrorCode MatMPIAIJDiagonalScaleLocal(Mat A,Vec scale) 295 { 296 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 297 PetscErrorCode ierr,(*f)(Mat,Vec); 298 299 PetscFunctionBegin; 300 ierr = PetscObjectQueryFunction((PetscObject)A,"MatDiagonalScaleLocal_C",(void (**)(void))&f);CHKERRQ(ierr); 301 if (f) { 302 ierr = (*f)(A,scale);CHKERRQ(ierr); 303 } 304 PetscFunctionReturn(0); 305 } 306 307 EXTERN_C_BEGIN 308 #undef __FUNCT__ 309 #define __FUNCT__ "MatDiagonalScaleLocal_MPIAIJ" 310 PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIAIJ(Mat A,Vec scale) 311 { 312 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; /*access private part of matrix */ 313 PetscErrorCode ierr; 314 PetscInt n,i; 315 PetscScalar *d,*o,*s; 316 317 PetscFunctionBegin; 318 if (!auglyrmapd) { 319 ierr = MatMPIAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 320 } 321 322 ierr = VecGetArray(scale,&s);CHKERRQ(ierr); 323 324 ierr = VecGetLocalSize(auglydd,&n);CHKERRQ(ierr); 325 ierr = VecGetArray(auglydd,&d);CHKERRQ(ierr); 326 for (i=0; i<n; i++) { 327 d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 328 } 329 ierr = VecRestoreArray(auglydd,&d);CHKERRQ(ierr); 330 /* column scale "diagonal" portion of local matrix */ 331 ierr = MatDiagonalScale(a->A,PETSC_NULL,auglydd);CHKERRQ(ierr); 332 333 ierr = VecGetLocalSize(auglyoo,&n);CHKERRQ(ierr); 334 ierr = VecGetArray(auglyoo,&o);CHKERRQ(ierr); 335 for (i=0; i<n; i++) { 336 o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 337 } 338 ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr); 339 ierr = VecRestoreArray(auglyoo,&o);CHKERRQ(ierr); 340 /* column scale "off-diagonal" portion of local matrix */ 341 ierr = MatDiagonalScale(a->B,PETSC_NULL,auglyoo);CHKERRQ(ierr); 342 343 PetscFunctionReturn(0); 344 } 345 EXTERN_C_END 346 347 348