1 2 /* 3 Support for the parallel BAIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/baij/mpi/mpibaij.h> 6 #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 7 8 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ" 12 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat) 13 { 14 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 15 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 16 PetscErrorCode ierr; 17 PetscInt i,j,*aj = B->j,ec = 0,*garray; 18 PetscInt bs = mat->rmap->bs,*stmp; 19 IS from,to; 20 Vec gvec; 21 #if defined(PETSC_USE_CTABLE) 22 PetscTable gid1_lid1; 23 PetscTablePosition tpos; 24 PetscInt gid,lid; 25 #else 26 PetscInt Nbs = baij->Nbs,*indices; 27 #endif 28 29 PetscFunctionBegin; 30 #if defined(PETSC_USE_CTABLE) 31 /* use a table - Mark Adams */ 32 ierr = PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1);CHKERRQ(ierr); 33 for (i=0; i<B->mbs; i++) { 34 for (j=0; j<B->ilen[i]; j++) { 35 PetscInt data,gid1 = aj[B->i[i]+j] + 1; 36 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 37 if (!data) { 38 /* one based table */ 39 ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); 40 } 41 } 42 } 43 /* form array of columns we need */ 44 ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr); 45 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 46 while (tpos) { 47 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 48 gid--; lid--; 49 garray[lid] = gid; 50 } 51 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 52 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 53 for (i=0; i<ec; i++) { 54 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 55 } 56 /* compact out the extra columns in B */ 57 for (i=0; i<B->mbs; i++) { 58 for (j=0; j<B->ilen[i]; j++) { 59 PetscInt gid1 = aj[B->i[i] + j] + 1; 60 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 61 lid--; 62 aj[B->i[i]+j] = lid; 63 } 64 } 65 B->nbs = ec; 66 baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs; 67 68 ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); 69 ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); 70 #else 71 /* Make an array as long as the number of columns */ 72 /* mark those columns that are in baij->B */ 73 ierr = PetscCalloc1(Nbs+1,&indices);CHKERRQ(ierr); 74 for (i=0; i<B->mbs; i++) { 75 for (j=0; j<B->ilen[i]; j++) { 76 if (!indices[aj[B->i[i] + j]]) ec++; 77 indices[aj[B->i[i] + j]] = 1; 78 } 79 } 80 81 /* form array of columns we need */ 82 ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr); 83 ec = 0; 84 for (i=0; i<Nbs; i++) { 85 if (indices[i]) { 86 garray[ec++] = i; 87 } 88 } 89 90 /* make indices now point into garray */ 91 for (i=0; i<ec; i++) { 92 indices[garray[i]] = i; 93 } 94 95 /* compact out the extra columns in B */ 96 for (i=0; i<B->mbs; i++) { 97 for (j=0; j<B->ilen[i]; j++) { 98 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 99 } 100 } 101 B->nbs = ec; 102 baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs; 103 104 ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); 105 ierr = PetscFree(indices);CHKERRQ(ierr); 106 #endif 107 108 /* create local vector that is used to scatter into */ 109 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 110 111 /* create two temporary index sets for building scatter-gather */ 112 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 113 114 ierr = PetscMalloc1(ec+1,&stmp);CHKERRQ(ierr); 115 for (i=0; i<ec; i++) stmp[i] = i; 116 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr); 117 118 /* create temporary global vector to generate scatter context */ 119 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr); 120 121 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 122 123 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->Mvctx);CHKERRQ(ierr); 124 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->lvec);CHKERRQ(ierr); 125 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); 126 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); 127 128 baij->garray = garray; 129 130 ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 131 ierr = ISDestroy(&from);CHKERRQ(ierr); 132 ierr = ISDestroy(&to);CHKERRQ(ierr); 133 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 134 PetscFunctionReturn(0); 135 } 136 137 /* 138 Takes the local part of an already assembled MPIBAIJ matrix 139 and disassembles it. This is to allow new nonzeros into the matrix 140 that require more communication in the matrix vector multiply. 141 Thus certain data-structures must be rebuilt. 142 143 Kind of slow! But that's what application programmers get when 144 they are sloppy. 145 */ 146 #undef __FUNCT__ 147 #define __FUNCT__ "MatDisAssemble_MPIBAIJ" 148 PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A) 149 { 150 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 151 Mat B = baij->B,Bnew; 152 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 153 PetscErrorCode ierr; 154 PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray; 155 PetscInt bs2 = baij->bs2,*nz,ec,m = A->rmap->n; 156 MatScalar *a = Bbaij->a; 157 MatScalar *atmp; 158 159 160 PetscFunctionBegin; 161 /* free stuff related to matrix-vec multiply */ 162 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 163 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 164 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 165 if (baij->colmap) { 166 #if defined(PETSC_USE_CTABLE) 167 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 168 #else 169 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 170 ierr = PetscLogObjectMemory((PetscObject)A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr); 171 #endif 172 } 173 174 /* make sure that B is assembled so we can access its values */ 175 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 176 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 177 178 /* invent new B and copy stuff over */ 179 ierr = PetscMalloc1(mbs,&nz);CHKERRQ(ierr); 180 for (i=0; i<mbs; i++) { 181 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 182 } 183 ierr = MatCreate(PetscObjectComm((PetscObject)B),&Bnew);CHKERRQ(ierr); 184 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 185 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 186 ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr); 187 188 ((Mat_SeqBAIJ*)Bnew->data)->nonew = Bbaij->nonew; /* Inherit insertion error options. */ 189 190 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 191 /* 192 Ensure that B's nonzerostate is monotonically increasing. 193 Or should this follow the MatSetValuesBlocked() loop to preserve B's nonzerstate across a MatDisAssemble() call? 194 */ 195 Bnew->nonzerostate = B->nonzerostate; 196 197 for (i=0; i<mbs; i++) { 198 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 199 col = garray[Bbaij->j[j]]; 200 atmp = a + j*bs2; 201 ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 202 } 203 } 204 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 205 206 ierr = PetscFree(nz);CHKERRQ(ierr); 207 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 208 ierr = PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 209 ierr = MatDestroy(&B);CHKERRQ(ierr); 210 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew);CHKERRQ(ierr); 211 212 baij->B = Bnew; 213 A->was_assembled = PETSC_FALSE; 214 A->assembled = PETSC_FALSE; 215 PetscFunctionReturn(0); 216 } 217 218 /* ugly stuff added for Glenn someday we should fix this up */ 219 220 static PetscInt *uglyrmapd = 0,*uglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 221 static Vec uglydd = 0,uglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 222 223 224 #undef __FUNCT__ 225 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp" 226 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 227 { 228 Mat_MPIBAIJ *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */ 229 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)ina->B->data; 230 PetscErrorCode ierr; 231 PetscInt bs = inA->rmap->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices; 232 PetscInt *r_rmapd,*r_rmapo; 233 234 PetscFunctionBegin; 235 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 236 ierr = MatGetSize(ina->A,NULL,&n);CHKERRQ(ierr); 237 ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapd);CHKERRQ(ierr); 238 nt = 0; 239 for (i=0; i<inA->rmap->mapping->n; i++) { 240 if (inA->rmap->mapping->indices[i]*bs >= cstart && inA->rmap->mapping->indices[i]*bs < cend) { 241 nt++; 242 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 243 } 244 } 245 if (nt*bs != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n); 246 ierr = PetscMalloc1(n+1,&uglyrmapd);CHKERRQ(ierr); 247 for (i=0; i<inA->rmap->mapping->n; i++) { 248 if (r_rmapd[i]) { 249 for (j=0; j<bs; j++) { 250 uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j; 251 } 252 } 253 } 254 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 255 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr); 256 257 ierr = PetscCalloc1(ina->Nbs+1,&lindices);CHKERRQ(ierr); 258 for (i=0; i<B->nbs; i++) { 259 lindices[garray[i]] = i+1; 260 } 261 no = inA->rmap->mapping->n - nt; 262 ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapo);CHKERRQ(ierr); 263 nt = 0; 264 for (i=0; i<inA->rmap->mapping->n; i++) { 265 if (lindices[inA->rmap->mapping->indices[i]]) { 266 nt++; 267 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 268 } 269 } 270 if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 271 ierr = PetscFree(lindices);CHKERRQ(ierr); 272 ierr = PetscMalloc1(nt*bs+1,&uglyrmapo);CHKERRQ(ierr); 273 for (i=0; i<inA->rmap->mapping->n; i++) { 274 if (r_rmapo[i]) { 275 for (j=0; j<bs; j++) { 276 uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j; 277 } 278 } 279 } 280 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 281 ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr); 282 PetscFunctionReturn(0); 283 } 284 285 #undef __FUNCT__ 286 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal" 287 PetscErrorCode MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale) 288 { 289 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 290 PetscErrorCode ierr; 291 292 PetscFunctionBegin; 293 ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr); 294 PetscFunctionReturn(0); 295 } 296 297 #undef __FUNCT__ 298 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ" 299 PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale) 300 { 301 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */ 302 PetscErrorCode ierr; 303 PetscInt n,i; 304 PetscScalar *d,*o,*s; 305 306 PetscFunctionBegin; 307 if (!uglyrmapd) { 308 ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 309 } 310 311 ierr = VecGetArray(scale,&s);CHKERRQ(ierr); 312 313 ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr); 314 ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr); 315 for (i=0; i<n; i++) { 316 d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 317 } 318 ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr); 319 /* column scale "diagonal" portion of local matrix */ 320 ierr = MatDiagonalScale(a->A,NULL,uglydd);CHKERRQ(ierr); 321 322 ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr); 323 ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr); 324 for (i=0; i<n; i++) { 325 o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 326 } 327 ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr); 328 ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr); 329 /* column scale "off-diagonal" portion of local matrix */ 330 ierr = MatDiagonalScale(a->B,NULL,uglyoo);CHKERRQ(ierr); 331 PetscFunctionReturn(0); 332 } 333