1 2 /* 3 Support for the parallel BAIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/baij/mpi/mpibaij.h> 6 7 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ" 11 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat) 12 { 13 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 14 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 15 PetscErrorCode ierr; 16 PetscInt i,j,*aj = B->j,ec = 0,*garray; 17 PetscInt bs = mat->rmap->bs,*stmp; 18 IS from,to; 19 Vec gvec; 20 #if defined (PETSC_USE_CTABLE) 21 PetscTable gid1_lid1; 22 PetscTablePosition tpos; 23 PetscInt gid,lid; 24 #else 25 PetscInt Nbs = baij->Nbs,*indices; 26 #endif 27 28 PetscFunctionBegin; 29 #if defined (PETSC_USE_CTABLE) 30 /* use a table - Mark Adams */ 31 ierr = PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1);CHKERRQ(ierr); 32 for (i=0; i<B->mbs; i++) { 33 for (j=0; j<B->ilen[i]; j++) { 34 PetscInt data,gid1 = aj[B->i[i]+j] + 1; 35 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 36 if (!data) { 37 /* one based table */ 38 ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); 39 } 40 } 41 } 42 /* form array of columns we need */ 43 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 44 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 45 while (tpos) { 46 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 47 gid--; lid--; 48 garray[lid] = gid; 49 } 50 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 51 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 52 for (i=0; i<ec; i++) { 53 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 54 } 55 /* compact out the extra columns in B */ 56 for (i=0; i<B->mbs; i++) { 57 for (j=0; j<B->ilen[i]; j++) { 58 PetscInt gid1 = aj[B->i[i] + j] + 1; 59 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 60 lid --; 61 aj[B->i[i]+j] = lid; 62 } 63 } 64 B->nbs = ec; 65 baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs; 66 ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); 67 ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); 68 #else 69 /* Make an array as long as the number of columns */ 70 /* mark those columns that are in baij->B */ 71 ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr); 72 ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); 73 for (i=0; i<B->mbs; i++) { 74 for (j=0; j<B->ilen[i]; j++) { 75 if (!indices[aj[B->i[i] + j]]) ec++; 76 indices[aj[B->i[i] + j]] = 1; 77 } 78 } 79 80 /* form array of columns we need */ 81 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 82 ec = 0; 83 for (i=0; i<Nbs; i++) { 84 if (indices[i]) { 85 garray[ec++] = i; 86 } 87 } 88 89 /* make indices now point into garray */ 90 for (i=0; i<ec; i++) { 91 indices[garray[i]] = i; 92 } 93 94 /* compact out the extra columns in B */ 95 for (i=0; i<B->mbs; i++) { 96 for (j=0; j<B->ilen[i]; j++) { 97 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 98 } 99 } 100 B->nbs = ec; 101 baij->B->cmap->n =baij->B->cmap->N = ec*mat->rmap->bs; 102 ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); 103 ierr = PetscFree(indices);CHKERRQ(ierr); 104 #endif 105 106 /* create local vector that is used to scatter into */ 107 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 108 109 /* create two temporary index sets for building scatter-gather */ 110 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 111 112 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr); 113 for (i=0; i<ec; i++) { stmp[i] = i; } 114 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr); 115 116 /* create temporary global vector to generate scatter context */ 117 ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,1,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 118 119 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 120 121 ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr); 122 ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr); 123 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 124 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 125 baij->garray = garray; 126 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 127 ierr = ISDestroy(&from);CHKERRQ(ierr); 128 ierr = ISDestroy(&to);CHKERRQ(ierr); 129 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 /* 134 Takes the local part of an already assembled MPIBAIJ matrix 135 and disassembles it. This is to allow new nonzeros into the matrix 136 that require more communication in the matrix vector multiply. 137 Thus certain data-structures must be rebuilt. 138 139 Kind of slow! But that's what application programmers get when 140 they are sloppy. 141 */ 142 #undef __FUNCT__ 143 #define __FUNCT__ "MatDisAssemble_MPIBAIJ" 144 PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A) 145 { 146 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 147 Mat B = baij->B,Bnew; 148 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 149 PetscErrorCode ierr; 150 PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray; 151 PetscInt bs2 = baij->bs2,*nz,ec,m = A->rmap->n; 152 MatScalar *a = Bbaij->a; 153 MatScalar *atmp; 154 155 156 PetscFunctionBegin; 157 /* free stuff related to matrix-vec multiply */ 158 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 159 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 160 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 161 if (baij->colmap) { 162 #if defined (PETSC_USE_CTABLE) 163 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 164 #else 165 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 166 ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr); 167 #endif 168 } 169 170 /* make sure that B is assembled so we can access its values */ 171 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 173 174 /* invent new B and copy stuff over */ 175 ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr); 176 for (i=0; i<mbs; i++) { 177 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 178 } 179 ierr = MatCreate(((PetscObject)B)->comm,&Bnew);CHKERRQ(ierr); 180 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 181 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 182 ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr); 183 ((Mat_SeqBAIJ*)Bnew->data)->nonew = Bbaij->nonew; /* Inherit insertion error options. */ 184 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 185 186 for (i=0; i<mbs; i++) { 187 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 188 col = garray[Bbaij->j[j]]; 189 atmp = a + j*bs2; 190 ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 191 } 192 } 193 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 194 195 ierr = PetscFree(nz);CHKERRQ(ierr); 196 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 197 ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 198 ierr = MatDestroy(&B);CHKERRQ(ierr); 199 ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr); 200 baij->B = Bnew; 201 A->was_assembled = PETSC_FALSE; 202 A->assembled = PETSC_FALSE; 203 PetscFunctionReturn(0); 204 } 205 206 /* ugly stuff added for Glenn someday we should fix this up */ 207 208 static PetscInt *uglyrmapd = 0,*uglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" 209 parts of the local matrix */ 210 static Vec uglydd = 0,uglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 211 212 213 #undef __FUNCT__ 214 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp" 215 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 216 { 217 Mat_MPIBAIJ *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */ 218 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)ina->B->data; 219 PetscErrorCode ierr; 220 PetscInt bs = inA->rmap->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices; 221 PetscInt *r_rmapd,*r_rmapo; 222 223 PetscFunctionBegin; 224 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 225 ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr); 226 ierr = PetscMalloc((inA->rmap->bmapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr); 227 ierr = PetscMemzero(r_rmapd,inA->rmap->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr); 228 nt = 0; 229 for (i=0; i<inA->rmap->bmapping->n; i++) { 230 if (inA->rmap->bmapping->indices[i]*bs >= cstart && inA->rmap->bmapping->indices[i]*bs < cend) { 231 nt++; 232 r_rmapd[i] = inA->rmap->bmapping->indices[i] + 1; 233 } 234 } 235 if (nt*bs != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n); 236 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&uglyrmapd);CHKERRQ(ierr); 237 for (i=0; i<inA->rmap->bmapping->n; i++) { 238 if (r_rmapd[i]) { 239 for (j=0; j<bs; j++) { 240 uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j; 241 } 242 } 243 } 244 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 245 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr); 246 247 ierr = PetscMalloc((ina->Nbs+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr); 248 ierr = PetscMemzero(lindices,ina->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 249 for (i=0; i<B->nbs; i++) { 250 lindices[garray[i]] = i+1; 251 } 252 no = inA->rmap->bmapping->n - nt; 253 ierr = PetscMalloc((inA->rmap->bmapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr); 254 ierr = PetscMemzero(r_rmapo,inA->rmap->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr); 255 nt = 0; 256 for (i=0; i<inA->rmap->bmapping->n; i++) { 257 if (lindices[inA->rmap->bmapping->indices[i]]) { 258 nt++; 259 r_rmapo[i] = lindices[inA->rmap->bmapping->indices[i]]; 260 } 261 } 262 if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 263 ierr = PetscFree(lindices);CHKERRQ(ierr); 264 ierr = PetscMalloc((nt*bs+1)*sizeof(PetscInt),&uglyrmapo);CHKERRQ(ierr); 265 for (i=0; i<inA->rmap->bmapping->n; i++) { 266 if (r_rmapo[i]) { 267 for (j=0; j<bs; j++) { 268 uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j; 269 } 270 } 271 } 272 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 273 ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr); 274 PetscFunctionReturn(0); 275 } 276 277 #undef __FUNCT__ 278 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal" 279 PetscErrorCode MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale) 280 { 281 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 282 PetscErrorCode ierr; 283 284 PetscFunctionBegin; 285 ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr); 286 PetscFunctionReturn(0); 287 } 288 289 EXTERN_C_BEGIN 290 #undef __FUNCT__ 291 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ" 292 PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale) 293 { 294 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */ 295 PetscErrorCode ierr; 296 PetscInt n,i; 297 PetscScalar *d,*o,*s; 298 299 PetscFunctionBegin; 300 if (!uglyrmapd) { 301 ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 302 } 303 304 ierr = VecGetArray(scale,&s);CHKERRQ(ierr); 305 306 ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr); 307 ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr); 308 for (i=0; i<n; i++) { 309 d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 310 } 311 ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr); 312 /* column scale "diagonal" portion of local matrix */ 313 ierr = MatDiagonalScale(a->A,PETSC_NULL,uglydd);CHKERRQ(ierr); 314 315 ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr); 316 ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr); 317 for (i=0; i<n; i++) { 318 o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 319 } 320 ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr); 321 ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr); 322 /* column scale "off-diagonal" portion of local matrix */ 323 ierr = MatDiagonalScale(a->B,PETSC_NULL,uglyoo);CHKERRQ(ierr); 324 PetscFunctionReturn(0); 325 } 326 EXTERN_C_END 327 328 329