1 2 /* 3 Support for the parallel BAIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/baij/mpi/mpibaij.h> 6 #include <petsc-private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 7 8 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ" 12 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat) 13 { 14 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 15 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 16 PetscErrorCode ierr; 17 PetscInt i,j,*aj = B->j,ec = 0,*garray; 18 PetscInt bs = mat->rmap->bs,*stmp; 19 IS from,to; 20 Vec gvec; 21 #if defined(PETSC_USE_CTABLE) 22 PetscTable gid1_lid1; 23 PetscTablePosition tpos; 24 PetscInt gid,lid; 25 #else 26 PetscInt Nbs = baij->Nbs,*indices; 27 #endif 28 29 PetscFunctionBegin; 30 #if defined(PETSC_USE_CTABLE) 31 /* use a table - Mark Adams */ 32 ierr = PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1);CHKERRQ(ierr); 33 for (i=0; i<B->mbs; i++) { 34 for (j=0; j<B->ilen[i]; j++) { 35 PetscInt data,gid1 = aj[B->i[i]+j] + 1; 36 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 37 if (!data) { 38 /* one based table */ 39 ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); 40 } 41 } 42 } 43 /* form array of columns we need */ 44 ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr); 45 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 46 while (tpos) { 47 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 48 gid--; lid--; 49 garray[lid] = gid; 50 } 51 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 52 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 53 for (i=0; i<ec; i++) { 54 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 55 } 56 /* compact out the extra columns in B */ 57 for (i=0; i<B->mbs; i++) { 58 for (j=0; j<B->ilen[i]; j++) { 59 PetscInt gid1 = aj[B->i[i] + j] + 1; 60 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 61 lid--; 62 aj[B->i[i]+j] = lid; 63 } 64 } 65 B->nbs = ec; 66 baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs; 67 68 ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); 69 ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); 70 #else 71 /* Make an array as long as the number of columns */ 72 /* mark those columns that are in baij->B */ 73 ierr = PetscCalloc1(Nbs+1,&indices);CHKERRQ(ierr); 74 for (i=0; i<B->mbs; i++) { 75 for (j=0; j<B->ilen[i]; j++) { 76 if (!indices[aj[B->i[i] + j]]) ec++; 77 indices[aj[B->i[i] + j]] = 1; 78 } 79 } 80 81 /* form array of columns we need */ 82 ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr); 83 ec = 0; 84 for (i=0; i<Nbs; i++) { 85 if (indices[i]) { 86 garray[ec++] = i; 87 } 88 } 89 90 /* make indices now point into garray */ 91 for (i=0; i<ec; i++) { 92 indices[garray[i]] = i; 93 } 94 95 /* compact out the extra columns in B */ 96 for (i=0; i<B->mbs; i++) { 97 for (j=0; j<B->ilen[i]; j++) { 98 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 99 } 100 } 101 B->nbs = ec; 102 baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs; 103 104 ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); 105 ierr = PetscFree(indices);CHKERRQ(ierr); 106 #endif 107 108 /* create local vector that is used to scatter into */ 109 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 110 111 /* create two temporary index sets for building scatter-gather */ 112 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 113 114 ierr = PetscMalloc1(ec+1,&stmp);CHKERRQ(ierr); 115 for (i=0; i<ec; i++) stmp[i] = i; 116 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr); 117 118 /* create temporary global vector to generate scatter context */ 119 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr); 120 121 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 122 123 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->Mvctx);CHKERRQ(ierr); 124 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->lvec);CHKERRQ(ierr); 125 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); 126 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); 127 128 baij->garray = garray; 129 130 ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 131 ierr = ISDestroy(&from);CHKERRQ(ierr); 132 ierr = ISDestroy(&to);CHKERRQ(ierr); 133 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 134 PetscFunctionReturn(0); 135 } 136 137 /* 138 Takes the local part of an already assembled MPIBAIJ matrix 139 and disassembles it. This is to allow new nonzeros into the matrix 140 that require more communication in the matrix vector multiply. 141 Thus certain data-structures must be rebuilt. 142 143 Kind of slow! But that's what application programmers get when 144 they are sloppy. 145 */ 146 #undef __FUNCT__ 147 #define __FUNCT__ "MatDisAssemble_MPIBAIJ" 148 PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A) 149 { 150 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 151 Mat B = baij->B,Bnew; 152 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 153 PetscErrorCode ierr; 154 PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray; 155 PetscInt bs2 = baij->bs2,*nz,ec,m = A->rmap->n; 156 MatScalar *a = Bbaij->a; 157 MatScalar *atmp; 158 159 160 PetscFunctionBegin; 161 /* free stuff related to matrix-vec multiply */ 162 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 163 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 164 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 165 if (baij->colmap) { 166 #if defined(PETSC_USE_CTABLE) 167 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 168 #else 169 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 170 ierr = PetscLogObjectMemory((PetscObject)A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr); 171 #endif 172 } 173 174 /* make sure that B is assembled so we can access its values */ 175 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 176 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 177 178 /* invent new B and copy stuff over */ 179 ierr = PetscMalloc1(mbs,&nz);CHKERRQ(ierr); 180 for (i=0; i<mbs; i++) { 181 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 182 } 183 ierr = MatCreate(PetscObjectComm((PetscObject)B),&Bnew);CHKERRQ(ierr); 184 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 185 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 186 ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr); 187 188 ((Mat_SeqBAIJ*)Bnew->data)->nonew = Bbaij->nonew; /* Inherit insertion error options. */ 189 190 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 191 192 for (i=0; i<mbs; i++) { 193 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 194 col = garray[Bbaij->j[j]]; 195 atmp = a + j*bs2; 196 ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 197 } 198 } 199 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 200 201 ierr = PetscFree(nz);CHKERRQ(ierr); 202 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 203 ierr = PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 204 ierr = MatDestroy(&B);CHKERRQ(ierr); 205 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew);CHKERRQ(ierr); 206 207 baij->B = Bnew; 208 A->was_assembled = PETSC_FALSE; 209 A->assembled = PETSC_FALSE; 210 PetscFunctionReturn(0); 211 } 212 213 /* ugly stuff added for Glenn someday we should fix this up */ 214 215 static PetscInt *uglyrmapd = 0,*uglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 216 static Vec uglydd = 0,uglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 217 218 219 #undef __FUNCT__ 220 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp" 221 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 222 { 223 Mat_MPIBAIJ *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */ 224 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)ina->B->data; 225 PetscErrorCode ierr; 226 PetscInt bs = inA->rmap->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices; 227 PetscInt *r_rmapd,*r_rmapo; 228 229 PetscFunctionBegin; 230 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 231 ierr = MatGetSize(ina->A,NULL,&n);CHKERRQ(ierr); 232 ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapd);CHKERRQ(ierr); 233 nt = 0; 234 for (i=0; i<inA->rmap->mapping->n; i++) { 235 if (inA->rmap->mapping->indices[i]*bs >= cstart && inA->rmap->mapping->indices[i]*bs < cend) { 236 nt++; 237 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 238 } 239 } 240 if (nt*bs != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n); 241 ierr = PetscMalloc1(n+1,&uglyrmapd);CHKERRQ(ierr); 242 for (i=0; i<inA->rmap->mapping->n; i++) { 243 if (r_rmapd[i]) { 244 for (j=0; j<bs; j++) { 245 uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j; 246 } 247 } 248 } 249 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 250 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr); 251 252 ierr = PetscCalloc1(ina->Nbs+1,&lindices);CHKERRQ(ierr); 253 for (i=0; i<B->nbs; i++) { 254 lindices[garray[i]] = i+1; 255 } 256 no = inA->rmap->mapping->n - nt; 257 ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapo);CHKERRQ(ierr); 258 nt = 0; 259 for (i=0; i<inA->rmap->mapping->n; i++) { 260 if (lindices[inA->rmap->mapping->indices[i]]) { 261 nt++; 262 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 263 } 264 } 265 if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 266 ierr = PetscFree(lindices);CHKERRQ(ierr); 267 ierr = PetscMalloc1(nt*bs+1,&uglyrmapo);CHKERRQ(ierr); 268 for (i=0; i<inA->rmap->mapping->n; i++) { 269 if (r_rmapo[i]) { 270 for (j=0; j<bs; j++) { 271 uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j; 272 } 273 } 274 } 275 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 276 ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr); 277 PetscFunctionReturn(0); 278 } 279 280 #undef __FUNCT__ 281 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal" 282 PetscErrorCode MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale) 283 { 284 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 285 PetscErrorCode ierr; 286 287 PetscFunctionBegin; 288 ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr); 289 PetscFunctionReturn(0); 290 } 291 292 #undef __FUNCT__ 293 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ" 294 PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale) 295 { 296 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */ 297 PetscErrorCode ierr; 298 PetscInt n,i; 299 PetscScalar *d,*o,*s; 300 301 PetscFunctionBegin; 302 if (!uglyrmapd) { 303 ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 304 } 305 306 ierr = VecGetArray(scale,&s);CHKERRQ(ierr); 307 308 ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr); 309 ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr); 310 for (i=0; i<n; i++) { 311 d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 312 } 313 ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr); 314 /* column scale "diagonal" portion of local matrix */ 315 ierr = MatDiagonalScale(a->A,NULL,uglydd);CHKERRQ(ierr); 316 317 ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr); 318 ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr); 319 for (i=0; i<n; i++) { 320 o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 321 } 322 ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr); 323 ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr); 324 /* column scale "off-diagonal" portion of local matrix */ 325 ierr = MatDiagonalScale(a->B,NULL,uglyoo);CHKERRQ(ierr); 326 PetscFunctionReturn(0); 327 } 328 329 330