1 2 /* 3 Support for the parallel AIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/aij/mpi/mpiaij.h> 6 #include <petsc/private/vecimpl.h> 7 #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 8 9 PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 12 Mat_SeqAIJ *B = (Mat_SeqAIJ*)(aij->B->data); 13 PetscErrorCode ierr; 14 PetscInt i,j,*aj = B->j,ec = 0,*garray; 15 IS from,to; 16 Vec gvec; 17 #if defined(PETSC_USE_CTABLE) 18 PetscTable gid1_lid1; 19 PetscTablePosition tpos; 20 PetscInt gid,lid; 21 #else 22 PetscInt N = mat->cmap->N,*indices; 23 #endif 24 25 PetscFunctionBegin; 26 if (!aij->garray) { 27 #if defined(PETSC_USE_CTABLE) 28 /* use a table */ 29 ierr = PetscTableCreate(aij->B->rmap->n,mat->cmap->N+1,&gid1_lid1);CHKERRQ(ierr); 30 for (i=0; i<aij->B->rmap->n; i++) { 31 for (j=0; j<B->ilen[i]; j++) { 32 PetscInt data,gid1 = aj[B->i[i] + j] + 1; 33 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 34 if (!data) { 35 /* one based table */ 36 ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); 37 } 38 } 39 } 40 /* form array of columns we need */ 41 ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr); 42 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 43 while (tpos) { 44 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 45 gid--; 46 lid--; 47 garray[lid] = gid; 48 } 49 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); /* sort, and rebuild */ 50 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 51 for (i=0; i<ec; i++) { 52 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 53 } 54 /* compact out the extra columns in B */ 55 for (i=0; i<aij->B->rmap->n; i++) { 56 for (j=0; j<B->ilen[i]; j++) { 57 PetscInt gid1 = aj[B->i[i] + j] + 1; 58 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 59 lid--; 60 aj[B->i[i] + j] = lid; 61 } 62 } 63 aij->B->cmap->n = aij->B->cmap->N = ec; 64 aij->B->cmap->bs = 1; 65 66 ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); 67 ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); 68 #else 69 /* Make an array as long as the number of columns */ 70 /* mark those columns that are in aij->B */ 71 ierr = PetscCalloc1(N+1,&indices);CHKERRQ(ierr); 72 for (i=0; i<aij->B->rmap->n; i++) { 73 for (j=0; j<B->ilen[i]; j++) { 74 if (!indices[aj[B->i[i] + j]]) ec++; 75 indices[aj[B->i[i] + j]] = 1; 76 } 77 } 78 79 /* form array of columns we need */ 80 ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr); 81 ec = 0; 82 for (i=0; i<N; i++) { 83 if (indices[i]) garray[ec++] = i; 84 } 85 86 /* make indices now point into garray */ 87 for (i=0; i<ec; i++) { 88 indices[garray[i]] = i; 89 } 90 91 /* compact out the extra columns in B */ 92 for (i=0; i<aij->B->rmap->n; i++) { 93 for (j=0; j<B->ilen[i]; j++) { 94 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 95 } 96 } 97 aij->B->cmap->n = aij->B->cmap->N = ec; 98 aij->B->cmap->bs = 1; 99 100 ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); 101 ierr = PetscFree(indices);CHKERRQ(ierr); 102 #endif 103 } else { 104 garray = aij->garray; 105 } 106 107 if (!aij->lvec) { 108 /* create local vector that is used to scatter into */ 109 ierr = VecCreateSeq(PETSC_COMM_SELF,ec,&aij->lvec);CHKERRQ(ierr); 110 } else { 111 ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); 112 } 113 114 /* create two temporary Index sets for build scatter gather */ 115 ierr = ISCreateGeneral(((PetscObject)mat)->comm,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 116 117 ierr = ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to);CHKERRQ(ierr); 118 119 /* create temporary global vector to generate scatter context */ 120 /* This does not allocate the array's memory so is efficient */ 121 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr); 122 123 /* generate the scatter context */ 124 if (aij->Mvctx_mpi1_flg) { 125 ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr); 126 ierr = VecScatterCreate_MPI1(gvec,from,aij->lvec,to,&aij->Mvctx_mpi1);CHKERRQ(ierr); 127 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->Mvctx_mpi1);CHKERRQ(ierr); 128 } else { 129 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 130 ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx);CHKERRQ(ierr); 131 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->Mvctx);CHKERRQ(ierr); 132 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->lvec);CHKERRQ(ierr); 133 ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 134 } 135 aij->garray = garray; 136 137 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); 138 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); 139 140 ierr = ISDestroy(&from);CHKERRQ(ierr); 141 ierr = ISDestroy(&to);CHKERRQ(ierr); 142 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 143 PetscFunctionReturn(0); 144 } 145 146 /* 147 Takes the local part of an already assembled MPIAIJ matrix 148 and disassembles it. This is to allow new nonzeros into the matrix 149 that require more communication in the matrix vector multiply. 150 Thus certain data-structures must be rebuilt. 151 152 Kind of slow! But that's what application programmers get when 153 they are sloppy. 154 */ 155 PetscErrorCode MatDisAssemble_MPIAIJ(Mat A) 156 { 157 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 158 Mat B = aij->B,Bnew; 159 Mat_SeqAIJ *Baij = (Mat_SeqAIJ*)B->data; 160 PetscErrorCode ierr; 161 PetscInt i,j,m = B->rmap->n,n = A->cmap->N,col,ct = 0,*garray = aij->garray,*nz,ec; 162 PetscScalar v; 163 164 PetscFunctionBegin; 165 /* free stuff related to matrix-vec multiply */ 166 ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 167 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 168 if (aij->colmap) { 169 #if defined(PETSC_USE_CTABLE) 170 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 171 #else 172 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 173 ierr = PetscLogObjectMemory((PetscObject)A,-aij->B->cmap->n*sizeof(PetscInt));CHKERRQ(ierr); 174 #endif 175 } 176 177 /* make sure that B is assembled so we can access its values */ 178 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 179 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 180 181 /* invent new B and copy stuff over */ 182 ierr = PetscMalloc1(m+1,&nz);CHKERRQ(ierr); 183 for (i=0; i<m; i++) { 184 nz[i] = Baij->i[i+1] - Baij->i[i]; 185 } 186 ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr); 187 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 188 ierr = MatSetBlockSizesFromMats(Bnew,A,A);CHKERRQ(ierr); 189 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 190 ierr = MatSeqAIJSetPreallocation(Bnew,0,nz);CHKERRQ(ierr); 191 192 ((Mat_SeqAIJ*)Bnew->data)->nonew = Baij->nonew; /* Inherit insertion error options. */ 193 /* 194 Ensure that B's nonzerostate is monotonically increasing. 195 Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? 196 */ 197 Bnew->nonzerostate = B->nonzerostate; 198 199 ierr = PetscFree(nz);CHKERRQ(ierr); 200 for (i=0; i<m; i++) { 201 for (j=Baij->i[i]; j<Baij->i[i+1]; j++) { 202 col = garray[Baij->j[ct]]; 203 v = Baij->a[ct++]; 204 ierr = MatSetValues(Bnew,1,&i,1,&col,&v,B->insertmode);CHKERRQ(ierr); 205 } 206 } 207 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 208 ierr = PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 209 ierr = MatDestroy(&B);CHKERRQ(ierr); 210 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew);CHKERRQ(ierr); 211 212 aij->B = Bnew; 213 A->was_assembled = PETSC_FALSE; 214 PetscFunctionReturn(0); 215 } 216 217 /* ugly stuff added for Glenn someday we should fix this up */ 218 219 static PetscInt *auglyrmapd = 0,*auglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 220 static Vec auglydd = 0,auglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 221 222 223 PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 224 { 225 Mat_MPIAIJ *ina = (Mat_MPIAIJ*) inA->data; /*access private part of matrix */ 226 PetscErrorCode ierr; 227 PetscInt i,n,nt,cstart,cend,no,*garray = ina->garray,*lindices; 228 PetscInt *r_rmapd,*r_rmapo; 229 230 PetscFunctionBegin; 231 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 232 ierr = MatGetSize(ina->A,NULL,&n);CHKERRQ(ierr); 233 ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapd);CHKERRQ(ierr); 234 nt = 0; 235 for (i=0; i<inA->rmap->mapping->n; i++) { 236 if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) { 237 nt++; 238 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 239 } 240 } 241 if (nt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D n %D",nt,n); 242 ierr = PetscMalloc1(n+1,&auglyrmapd);CHKERRQ(ierr); 243 for (i=0; i<inA->rmap->mapping->n; i++) { 244 if (r_rmapd[i]) { 245 auglyrmapd[(r_rmapd[i]-1)-cstart] = i; 246 } 247 } 248 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 249 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&auglydd);CHKERRQ(ierr); 250 251 ierr = PetscCalloc1(inA->cmap->N+1,&lindices);CHKERRQ(ierr); 252 for (i=0; i<ina->B->cmap->n; i++) { 253 lindices[garray[i]] = i+1; 254 } 255 no = inA->rmap->mapping->n - nt; 256 ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapo);CHKERRQ(ierr); 257 nt = 0; 258 for (i=0; i<inA->rmap->mapping->n; i++) { 259 if (lindices[inA->rmap->mapping->indices[i]]) { 260 nt++; 261 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 262 } 263 } 264 if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 265 ierr = PetscFree(lindices);CHKERRQ(ierr); 266 ierr = PetscMalloc1(nt+1,&auglyrmapo);CHKERRQ(ierr); 267 for (i=0; i<inA->rmap->mapping->n; i++) { 268 if (r_rmapo[i]) { 269 auglyrmapo[(r_rmapo[i]-1)] = i; 270 } 271 } 272 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 273 ierr = VecCreateSeq(PETSC_COMM_SELF,nt,&auglyoo);CHKERRQ(ierr); 274 PetscFunctionReturn(0); 275 } 276 277 PetscErrorCode MatMPIAIJDiagonalScaleLocal(Mat A,Vec scale) 278 { 279 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 280 PetscErrorCode ierr; 281 282 PetscFunctionBegin; 283 ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr); 284 PetscFunctionReturn(0); 285 } 286 287 PetscErrorCode MatDiagonalScaleLocal_MPIAIJ(Mat A,Vec scale) 288 { 289 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; /*access private part of matrix */ 290 PetscErrorCode ierr; 291 PetscInt n,i; 292 PetscScalar *d,*o; 293 const PetscScalar *s; 294 295 PetscFunctionBegin; 296 if (!auglyrmapd) { 297 ierr = MatMPIAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 298 } 299 300 ierr = VecGetArrayRead(scale,&s);CHKERRQ(ierr); 301 302 ierr = VecGetLocalSize(auglydd,&n);CHKERRQ(ierr); 303 ierr = VecGetArray(auglydd,&d);CHKERRQ(ierr); 304 for (i=0; i<n; i++) { 305 d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 306 } 307 ierr = VecRestoreArray(auglydd,&d);CHKERRQ(ierr); 308 /* column scale "diagonal" portion of local matrix */ 309 ierr = MatDiagonalScale(a->A,NULL,auglydd);CHKERRQ(ierr); 310 311 ierr = VecGetLocalSize(auglyoo,&n);CHKERRQ(ierr); 312 ierr = VecGetArray(auglyoo,&o);CHKERRQ(ierr); 313 for (i=0; i<n; i++) { 314 o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 315 } 316 ierr = VecRestoreArrayRead(scale,&s);CHKERRQ(ierr); 317 ierr = VecRestoreArray(auglyoo,&o);CHKERRQ(ierr); 318 /* column scale "off-diagonal" portion of local matrix */ 319 ierr = MatDiagonalScale(a->B,NULL,auglyoo);CHKERRQ(ierr); 320 PetscFunctionReturn(0); 321 } 322