1 #define PETSCMAT_DLL 2 3 /* 4 Support for the parallel BAIJ matrix vector multiply 5 */ 6 #include "src/mat/impls/baij/mpi/mpibaij.h" 7 8 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ" 12 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat) 13 { 14 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 15 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 16 PetscErrorCode ierr; 17 PetscInt i,j,*aj = B->j,ec = 0,*garray; 18 PetscInt bs = mat->rmap.bs,*stmp; 19 IS from,to; 20 Vec gvec; 21 #if defined (PETSC_USE_CTABLE) 22 PetscTable gid1_lid1; 23 PetscTablePosition tpos; 24 PetscInt gid,lid; 25 #else 26 PetscInt Nbs = baij->Nbs,*indices; 27 #endif 28 29 PetscFunctionBegin; 30 31 #if defined (PETSC_USE_CTABLE) 32 /* use a table - Mark Adams */ 33 ierr = PetscTableCreate(B->mbs,&gid1_lid1);CHKERRQ(ierr); 34 for (i=0; i<B->mbs; i++) { 35 for (j=0; j<B->ilen[i]; j++) { 36 PetscInt data,gid1 = aj[B->i[i]+j] + 1; 37 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 38 if (!data) { 39 /* one based table */ 40 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 41 } 42 } 43 } 44 /* form array of columns we need */ 45 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 46 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 47 while (tpos) { 48 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 49 gid--; lid--; 50 garray[lid] = gid; 51 } 52 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 53 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 54 for (i=0; i<ec; i++) { 55 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 56 } 57 /* compact out the extra columns in B */ 58 for (i=0; i<B->mbs; i++) { 59 for (j=0; j<B->ilen[i]; j++) { 60 PetscInt gid1 = aj[B->i[i] + j] + 1; 61 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 62 lid --; 63 aj[B->i[i]+j] = lid; 64 } 65 } 66 B->nbs = ec; 67 baij->B->cmap.n = baij->B->cmap.N = ec*mat->rmap.bs; 68 ierr = PetscMapSetUp(&(baij->B->cmap));CHKERRQ(ierr); 69 ierr = PetscTableDestroy(gid1_lid1);CHKERRQ(ierr); 70 /* Mark Adams */ 71 #else 72 /* Make an array as long as the number of columns */ 73 /* mark those columns that are in baij->B */ 74 ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr); 75 ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); 76 for (i=0; i<B->mbs; i++) { 77 for (j=0; j<B->ilen[i]; j++) { 78 if (!indices[aj[B->i[i] + j]]) ec++; 79 indices[aj[B->i[i] + j]] = 1; 80 } 81 } 82 83 /* form array of columns we need */ 84 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 85 ec = 0; 86 for (i=0; i<Nbs; i++) { 87 if (indices[i]) { 88 garray[ec++] = i; 89 } 90 } 91 92 /* make indices now point into garray */ 93 for (i=0; i<ec; i++) { 94 indices[garray[i]] = i; 95 } 96 97 /* compact out the extra columns in B */ 98 for (i=0; i<B->mbs; i++) { 99 for (j=0; j<B->ilen[i]; j++) { 100 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 101 } 102 } 103 B->nbs = ec; 104 baij->B->cmap.n =baij->B->cmap.N = ec*mat->rmap.bs; 105 ierr = PetscMapSetUp(&(baij->B->cmap));CHKERRQ(ierr); 106 ierr = PetscFree(indices);CHKERRQ(ierr); 107 #endif 108 109 /* create local vector that is used to scatter into */ 110 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 111 112 /* create two temporary index sets for building scatter-gather */ 113 for (i=0; i<ec; i++) { 114 garray[i] = bs*garray[i]; 115 } 116 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr); 117 for (i=0; i<ec; i++) { 118 garray[i] = garray[i]/bs; 119 } 120 121 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr); 122 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 123 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 124 ierr = PetscFree(stmp);CHKERRQ(ierr); 125 126 /* create temporary global vector to generate scatter context */ 127 /* this is inefficient, but otherwise we must do either 128 1) save garray until the first actual scatter when the vector is known or 129 2) have another way of generating a scatter context without a vector.*/ 130 ierr = VecCreateMPI(mat->comm,mat->cmap.n,mat->cmap.N,&gvec);CHKERRQ(ierr); 131 132 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 133 134 ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr); 135 ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr); 136 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 137 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 138 baij->garray = garray; 139 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 140 ierr = ISDestroy(from);CHKERRQ(ierr); 141 ierr = ISDestroy(to);CHKERRQ(ierr); 142 ierr = VecDestroy(gvec);CHKERRQ(ierr); 143 PetscFunctionReturn(0); 144 } 145 146 /* 147 Takes the local part of an already assembled MPIBAIJ matrix 148 and disassembles it. This is to allow new nonzeros into the matrix 149 that require more communication in the matrix vector multiply. 150 Thus certain data-structures must be rebuilt. 151 152 Kind of slow! But that's what application programmers get when 153 they are sloppy. 154 */ 155 #undef __FUNCT__ 156 #define __FUNCT__ "DisAssemble_MPIBAIJ" 157 PetscErrorCode DisAssemble_MPIBAIJ(Mat A) 158 { 159 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 160 Mat B = baij->B,Bnew; 161 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 162 PetscErrorCode ierr; 163 PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap.n,col,*garray=baij->garray; 164 PetscInt bs2 = baij->bs2,*nz,ec,m = A->rmap.n; 165 MatScalar *a = Bbaij->a; 166 PetscScalar *atmp; 167 #if defined(PETSC_USE_MAT_SINGLE) 168 PetscInt k; 169 #endif 170 171 PetscFunctionBegin; 172 /* free stuff related to matrix-vec multiply */ 173 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 174 ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 175 ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 176 if (baij->colmap) { 177 #if defined (PETSC_USE_CTABLE) 178 ierr = PetscTableDestroy(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 179 #else 180 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 181 baij->colmap = 0; 182 ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr); 183 #endif 184 } 185 186 /* make sure that B is assembled so we can access its values */ 187 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 188 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 189 190 /* invent new B and copy stuff over */ 191 ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr); 192 for (i=0; i<mbs; i++) { 193 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 194 } 195 ierr = MatCreate(B->comm,&Bnew);CHKERRQ(ierr); 196 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 197 ierr = MatSetType(Bnew,B->type_name);CHKERRQ(ierr); 198 ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap.bs,0,nz);CHKERRQ(ierr); 199 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 200 201 #if defined(PETSC_USE_MAT_SINGLE) 202 ierr = PetscMalloc(bs2*sizeof(PetscScalar),&atmp);CHKERRQ(ierr); 203 #endif 204 for (i=0; i<mbs; i++) { 205 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 206 col = garray[Bbaij->j[j]]; 207 #if defined(PETSC_USE_MAT_SINGLE) 208 for (k=0; k<bs2; k++) atmp[k] = a[j*bs2+k]; 209 #else 210 atmp = a + j*bs2; 211 #endif 212 ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 213 } 214 } 215 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 216 217 #if defined(PETSC_USE_MAT_SINGLE) 218 ierr = PetscFree(atmp);CHKERRQ(ierr); 219 #endif 220 221 ierr = PetscFree(nz);CHKERRQ(ierr); 222 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 223 baij->garray = 0; 224 ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 225 ierr = MatDestroy(B);CHKERRQ(ierr); 226 ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr); 227 baij->B = Bnew; 228 A->was_assembled = PETSC_FALSE; 229 PetscFunctionReturn(0); 230 } 231 232 /* ugly stuff added for Glenn someday we should fix this up */ 233 234 static PetscInt *uglyrmapd = 0,*uglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" 235 parts of the local matrix */ 236 static Vec uglydd = 0,uglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 237 238 239 #undef __FUNCT__ 240 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp" 241 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 242 { 243 Mat_MPIBAIJ *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */ 244 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)ina->B->data; 245 PetscErrorCode ierr; 246 PetscInt bs = inA->rmap.bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices; 247 PetscInt *r_rmapd,*r_rmapo; 248 249 PetscFunctionBegin; 250 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 251 ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr); 252 ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr); 253 ierr = PetscMemzero(r_rmapd,inA->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr); 254 nt = 0; 255 for (i=0; i<inA->bmapping->n; i++) { 256 if (inA->bmapping->indices[i]*bs >= cstart && inA->bmapping->indices[i]*bs < cend) { 257 nt++; 258 r_rmapd[i] = inA->bmapping->indices[i] + 1; 259 } 260 } 261 if (nt*bs != n) SETERRQ2(PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n); 262 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&uglyrmapd);CHKERRQ(ierr); 263 for (i=0; i<inA->bmapping->n; i++) { 264 if (r_rmapd[i]){ 265 for (j=0; j<bs; j++) { 266 uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j; 267 } 268 } 269 } 270 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 271 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr); 272 273 ierr = PetscMalloc((ina->Nbs+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr); 274 ierr = PetscMemzero(lindices,ina->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 275 for (i=0; i<B->nbs; i++) { 276 lindices[garray[i]] = i+1; 277 } 278 no = inA->bmapping->n - nt; 279 ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr); 280 ierr = PetscMemzero(r_rmapo,inA->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr); 281 nt = 0; 282 for (i=0; i<inA->bmapping->n; i++) { 283 if (lindices[inA->bmapping->indices[i]]) { 284 nt++; 285 r_rmapo[i] = lindices[inA->bmapping->indices[i]]; 286 } 287 } 288 if (nt > no) SETERRQ2(PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 289 ierr = PetscFree(lindices);CHKERRQ(ierr); 290 ierr = PetscMalloc((nt*bs+1)*sizeof(PetscInt),&uglyrmapo);CHKERRQ(ierr); 291 for (i=0; i<inA->bmapping->n; i++) { 292 if (r_rmapo[i]){ 293 for (j=0; j<bs; j++) { 294 uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j; 295 } 296 } 297 } 298 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 299 ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr); 300 301 PetscFunctionReturn(0); 302 } 303 304 #undef __FUNCT__ 305 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal" 306 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale) 307 { 308 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 309 PetscErrorCode ierr,(*f)(Mat,Vec); 310 311 PetscFunctionBegin; 312 ierr = PetscObjectQueryFunction((PetscObject)A,"MatDiagonalScaleLocal_C",(void (**)(void))&f);CHKERRQ(ierr); 313 if (f) { 314 ierr = (*f)(A,scale);CHKERRQ(ierr); 315 } 316 PetscFunctionReturn(0); 317 } 318 319 EXTERN_C_BEGIN 320 #undef __FUNCT__ 321 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ" 322 PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale) 323 { 324 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */ 325 PetscErrorCode ierr; 326 PetscInt n,i; 327 PetscScalar *d,*o,*s; 328 329 PetscFunctionBegin; 330 if (!uglyrmapd) { 331 ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 332 } 333 334 ierr = VecGetArray(scale,&s);CHKERRQ(ierr); 335 336 ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr); 337 ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr); 338 for (i=0; i<n; i++) { 339 d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 340 } 341 ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr); 342 /* column scale "diagonal" portion of local matrix */ 343 ierr = MatDiagonalScale(a->A,PETSC_NULL,uglydd);CHKERRQ(ierr); 344 345 ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr); 346 ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr); 347 for (i=0; i<n; i++) { 348 o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 349 } 350 ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr); 351 ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr); 352 /* column scale "off-diagonal" portion of local matrix */ 353 ierr = MatDiagonalScale(a->B,PETSC_NULL,uglyoo);CHKERRQ(ierr); 354 355 PetscFunctionReturn(0); 356 } 357 EXTERN_C_END 358 359 360