1 2 /* 3 Support for the parallel AIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/aij/mpi/mpiaij.h> 6 #include <petsc/private/vecimpl.h> 7 #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 8 9 PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 12 Mat_SeqAIJ *B = (Mat_SeqAIJ*)(aij->B->data); 13 PetscErrorCode ierr; 14 PetscInt i,j,*aj = B->j,ec = 0,*garray; 15 IS from,to; 16 Vec gvec; 17 #if defined(PETSC_USE_CTABLE) 18 PetscTable gid1_lid1; 19 PetscTablePosition tpos; 20 PetscInt gid,lid; 21 #else 22 PetscInt N = mat->cmap->N,*indices; 23 #endif 24 25 PetscFunctionBegin; 26 if (!aij->garray) { 27 #if defined(PETSC_USE_CTABLE) 28 /* use a table */ 29 ierr = PetscTableCreate(aij->B->rmap->n,mat->cmap->N+1,&gid1_lid1);CHKERRQ(ierr); 30 for (i=0; i<aij->B->rmap->n; i++) { 31 for (j=0; j<B->ilen[i]; j++) { 32 PetscInt data,gid1 = aj[B->i[i] + j] + 1; 33 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 34 if (!data) { 35 /* one based table */ 36 ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); 37 } 38 } 39 } 40 /* form array of columns we need */ 41 ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr); 42 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 43 while (tpos) { 44 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 45 gid--; 46 lid--; 47 garray[lid] = gid; 48 } 49 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); /* sort, and rebuild */ 50 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 51 for (i=0; i<ec; i++) { 52 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 53 } 54 /* compact out the extra columns in B */ 55 for (i=0; i<aij->B->rmap->n; i++) { 56 for (j=0; j<B->ilen[i]; j++) { 57 PetscInt gid1 = aj[B->i[i] + j] + 1; 58 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 59 lid--; 60 aj[B->i[i] + j] = lid; 61 } 62 } 63 aij->B->cmap->n = aij->B->cmap->N = ec; 64 aij->B->cmap->bs = 1; 65 66 ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); 67 ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); 68 #else 69 /* Make an array as long as the number of columns */ 70 /* mark those columns that are in aij->B */ 71 ierr = PetscCalloc1(N+1,&indices);CHKERRQ(ierr); 72 for (i=0; i<aij->B->rmap->n; i++) { 73 for (j=0; j<B->ilen[i]; j++) { 74 if (!indices[aj[B->i[i] + j]]) ec++; 75 indices[aj[B->i[i] + j]] = 1; 76 } 77 } 78 79 /* form array of columns we need */ 80 ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr); 81 ec = 0; 82 for (i=0; i<N; i++) { 83 if (indices[i]) garray[ec++] = i; 84 } 85 86 /* make indices now point into garray */ 87 for (i=0; i<ec; i++) { 88 indices[garray[i]] = i; 89 } 90 91 /* compact out the extra columns in B */ 92 for (i=0; i<aij->B->rmap->n; i++) { 93 for (j=0; j<B->ilen[i]; j++) { 94 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 95 } 96 } 97 aij->B->cmap->n = aij->B->cmap->N = ec; 98 aij->B->cmap->bs = 1; 99 100 ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); 101 ierr = PetscFree(indices);CHKERRQ(ierr); 102 #endif 103 } else { 104 garray = aij->garray; 105 } 106 107 if (!aij->lvec) { 108 /* create local vector that is used to scatter into */ 109 ierr = VecCreateSeq(PETSC_COMM_SELF,ec,&aij->lvec);CHKERRQ(ierr); 110 } else { 111 ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); 112 } 113 114 /* create two temporary Index sets for build scatter gather */ 115 ierr = ISCreateGeneral(PETSC_COMM_SELF,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 116 ierr = ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to);CHKERRQ(ierr); 117 118 /* create temporary global vector to generate scatter context */ 119 /* This does not allocate the array's memory so is efficient */ 120 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr); 121 122 /* generate the scatter context */ 123 if (aij->Mvctx_mpi1_flg) { 124 ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr); 125 ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx_mpi1);CHKERRQ(ierr); 126 ierr = VecScatterSetType(aij->Mvctx_mpi1,VECSCATTERMPI1);CHKERRQ(ierr); 127 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->Mvctx_mpi1);CHKERRQ(ierr); 128 } else { 129 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 130 ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx);CHKERRQ(ierr); 131 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->Mvctx);CHKERRQ(ierr); 132 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->lvec);CHKERRQ(ierr); 133 ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 134 } 135 aij->garray = garray; 136 137 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); 138 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); 139 140 ierr = ISDestroy(&from);CHKERRQ(ierr); 141 ierr = ISDestroy(&to);CHKERRQ(ierr); 142 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 143 PetscFunctionReturn(0); 144 } 145 146 /* 147 Takes the local part of an already assembled MPIAIJ matrix 148 and disassembles it. This is to allow new nonzeros into the matrix 149 that require more communication in the matrix vector multiply. 150 Thus certain data-structures must be rebuilt. 151 152 Kind of slow! But that's what application programmers get when 153 they are sloppy. 154 */ 155 PetscErrorCode MatDisAssemble_MPIAIJ(Mat A) 156 { 157 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 158 Mat B = aij->B,Bnew; 159 Mat_SeqAIJ *Baij = (Mat_SeqAIJ*)B->data; 160 PetscErrorCode ierr; 161 PetscInt i,j,m = B->rmap->n,n = A->cmap->N,col,ct = 0,*garray = aij->garray,*nz,ec; 162 PetscScalar v; 163 164 PetscFunctionBegin; 165 /* free stuff related to matrix-vec multiply */ 166 ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 167 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 168 if (aij->colmap) { 169 #if defined(PETSC_USE_CTABLE) 170 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 171 #else 172 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 173 ierr = PetscLogObjectMemory((PetscObject)A,-aij->B->cmap->n*sizeof(PetscInt));CHKERRQ(ierr); 174 #endif 175 } 176 177 /* make sure that B is assembled so we can access its values */ 178 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 179 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 180 181 /* invent new B and copy stuff over */ 182 ierr = PetscMalloc1(m+1,&nz);CHKERRQ(ierr); 183 for (i=0; i<m; i++) { 184 nz[i] = Baij->i[i+1] - Baij->i[i]; 185 } 186 ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr); 187 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 188 ierr = MatSetBlockSizesFromMats(Bnew,A,A);CHKERRQ(ierr); 189 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 190 ierr = MatSeqAIJSetPreallocation(Bnew,0,nz);CHKERRQ(ierr); 191 192 if (Baij->nonew >= 0) { /* Inherit insertion error options (if positive). */ 193 ((Mat_SeqAIJ*)Bnew->data)->nonew = Baij->nonew; 194 } 195 196 /* 197 Ensure that B's nonzerostate is monotonically increasing. 198 Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? 199 */ 200 Bnew->nonzerostate = B->nonzerostate; 201 202 ierr = PetscFree(nz);CHKERRQ(ierr); 203 for (i=0; i<m; i++) { 204 for (j=Baij->i[i]; j<Baij->i[i+1]; j++) { 205 col = garray[Baij->j[ct]]; 206 v = Baij->a[ct++]; 207 ierr = MatSetValues(Bnew,1,&i,1,&col,&v,B->insertmode);CHKERRQ(ierr); 208 } 209 } 210 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 211 ierr = PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 212 ierr = MatDestroy(&B);CHKERRQ(ierr); 213 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew);CHKERRQ(ierr); 214 215 aij->B = Bnew; 216 A->was_assembled = PETSC_FALSE; 217 PetscFunctionReturn(0); 218 } 219 220 /* ugly stuff added for Glenn someday we should fix this up */ 221 222 static PetscInt *auglyrmapd = 0,*auglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 223 static Vec auglydd = 0,auglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 224 225 226 PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 227 { 228 Mat_MPIAIJ *ina = (Mat_MPIAIJ*) inA->data; /*access private part of matrix */ 229 PetscErrorCode ierr; 230 PetscInt i,n,nt,cstart,cend,no,*garray = ina->garray,*lindices; 231 PetscInt *r_rmapd,*r_rmapo; 232 233 PetscFunctionBegin; 234 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 235 ierr = MatGetSize(ina->A,NULL,&n);CHKERRQ(ierr); 236 ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapd);CHKERRQ(ierr); 237 nt = 0; 238 for (i=0; i<inA->rmap->mapping->n; i++) { 239 if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) { 240 nt++; 241 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 242 } 243 } 244 if (nt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D n %D",nt,n); 245 ierr = PetscMalloc1(n+1,&auglyrmapd);CHKERRQ(ierr); 246 for (i=0; i<inA->rmap->mapping->n; i++) { 247 if (r_rmapd[i]) { 248 auglyrmapd[(r_rmapd[i]-1)-cstart] = i; 249 } 250 } 251 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 252 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&auglydd);CHKERRQ(ierr); 253 254 ierr = PetscCalloc1(inA->cmap->N+1,&lindices);CHKERRQ(ierr); 255 for (i=0; i<ina->B->cmap->n; i++) { 256 lindices[garray[i]] = i+1; 257 } 258 no = inA->rmap->mapping->n - nt; 259 ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapo);CHKERRQ(ierr); 260 nt = 0; 261 for (i=0; i<inA->rmap->mapping->n; i++) { 262 if (lindices[inA->rmap->mapping->indices[i]]) { 263 nt++; 264 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 265 } 266 } 267 if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 268 ierr = PetscFree(lindices);CHKERRQ(ierr); 269 ierr = PetscMalloc1(nt+1,&auglyrmapo);CHKERRQ(ierr); 270 for (i=0; i<inA->rmap->mapping->n; i++) { 271 if (r_rmapo[i]) { 272 auglyrmapo[(r_rmapo[i]-1)] = i; 273 } 274 } 275 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 276 ierr = VecCreateSeq(PETSC_COMM_SELF,nt,&auglyoo);CHKERRQ(ierr); 277 PetscFunctionReturn(0); 278 } 279 280 PetscErrorCode MatMPIAIJDiagonalScaleLocal(Mat A,Vec scale) 281 { 282 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 283 PetscErrorCode ierr; 284 285 PetscFunctionBegin; 286 ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr); 287 PetscFunctionReturn(0); 288 } 289 290 PetscErrorCode MatDiagonalScaleLocal_MPIAIJ(Mat A,Vec scale) 291 { 292 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; /*access private part of matrix */ 293 PetscErrorCode ierr; 294 PetscInt n,i; 295 PetscScalar *d,*o; 296 const PetscScalar *s; 297 298 PetscFunctionBegin; 299 if (!auglyrmapd) { 300 ierr = MatMPIAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 301 } 302 303 ierr = VecGetArrayRead(scale,&s);CHKERRQ(ierr); 304 305 ierr = VecGetLocalSize(auglydd,&n);CHKERRQ(ierr); 306 ierr = VecGetArray(auglydd,&d);CHKERRQ(ierr); 307 for (i=0; i<n; i++) { 308 d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 309 } 310 ierr = VecRestoreArray(auglydd,&d);CHKERRQ(ierr); 311 /* column scale "diagonal" portion of local matrix */ 312 ierr = MatDiagonalScale(a->A,NULL,auglydd);CHKERRQ(ierr); 313 314 ierr = VecGetLocalSize(auglyoo,&n);CHKERRQ(ierr); 315 ierr = VecGetArray(auglyoo,&o);CHKERRQ(ierr); 316 for (i=0; i<n; i++) { 317 o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 318 } 319 ierr = VecRestoreArrayRead(scale,&s);CHKERRQ(ierr); 320 ierr = VecRestoreArray(auglyoo,&o);CHKERRQ(ierr); 321 /* column scale "off-diagonal" portion of local matrix */ 322 ierr = MatDiagonalScale(a->B,NULL,auglyoo);CHKERRQ(ierr); 323 PetscFunctionReturn(0); 324 } 325