1 #define PETSCMAT_DLL 2 3 /* 4 Support for the parallel BAIJ matrix vector multiply 5 */ 6 #include "src/mat/impls/baij/mpi/mpibaij.h" 7 8 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ" 12 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat) 13 { 14 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 15 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 16 PetscErrorCode ierr; 17 PetscInt i,j,*aj = B->j,ec = 0,*garray; 18 PetscInt bs = mat->bs,*stmp; 19 IS from,to; 20 Vec gvec; 21 #if defined (PETSC_USE_CTABLE) 22 PetscTable gid1_lid1; 23 PetscTablePosition tpos; 24 PetscInt gid,lid; 25 #else 26 PetscInt Nbs = baij->Nbs,*indices; 27 #endif 28 29 PetscFunctionBegin; 30 31 #if defined (PETSC_USE_CTABLE) 32 /* use a table - Mark Adams */ 33 ierr = PetscTableCreate(B->mbs,&gid1_lid1);CHKERRQ(ierr); 34 for (i=0; i<B->mbs; i++) { 35 for (j=0; j<B->ilen[i]; j++) { 36 PetscInt data,gid1 = aj[B->i[i]+j] + 1; 37 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 38 if (!data) { 39 /* one based table */ 40 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 41 } 42 } 43 } 44 /* form array of columns we need */ 45 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 46 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 47 while (tpos) { 48 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 49 gid--; lid--; 50 garray[lid] = gid; 51 } 52 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 53 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 54 for (i=0; i<ec; i++) { 55 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 56 } 57 /* compact out the extra columns in B */ 58 for (i=0; i<B->mbs; i++) { 59 for (j=0; j<B->ilen[i]; j++) { 60 PetscInt gid1 = aj[B->i[i] + j] + 1; 61 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 62 lid --; 63 aj[B->i[i]+j] = lid; 64 } 65 } 66 B->nbs = ec; 67 baij->B->n = ec*mat->bs; 68 ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr); 69 /* Mark Adams */ 70 #else 71 /* Make an array as long as the number of columns */ 72 /* mark those columns that are in baij->B */ 73 ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr); 74 ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); 75 for (i=0; i<B->mbs; i++) { 76 for (j=0; j<B->ilen[i]; j++) { 77 if (!indices[aj[B->i[i] + j]]) ec++; 78 indices[aj[B->i[i] + j]] = 1; 79 } 80 } 81 82 /* form array of columns we need */ 83 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 84 ec = 0; 85 for (i=0; i<Nbs; i++) { 86 if (indices[i]) { 87 garray[ec++] = i; 88 } 89 } 90 91 /* make indices now point into garray */ 92 for (i=0; i<ec; i++) { 93 indices[garray[i]] = i; 94 } 95 96 /* compact out the extra columns in B */ 97 for (i=0; i<B->mbs; i++) { 98 for (j=0; j<B->ilen[i]; j++) { 99 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 100 } 101 } 102 B->nbs = ec; 103 baij->B->n = ec*mat->bs; 104 ierr = PetscFree(indices);CHKERRQ(ierr); 105 #endif 106 107 /* create local vector that is used to scatter into */ 108 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 109 110 /* create two temporary index sets for building scatter-gather */ 111 for (i=0; i<ec; i++) { 112 garray[i] = bs*garray[i]; 113 } 114 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr); 115 for (i=0; i<ec; i++) { 116 garray[i] = garray[i]/bs; 117 } 118 119 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr); 120 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 121 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 122 ierr = PetscFree(stmp);CHKERRQ(ierr); 123 124 /* create temporary global vector to generate scatter context */ 125 /* this is inefficient, but otherwise we must do either 126 1) save garray until the first actual scatter when the vector is known or 127 2) have another way of generating a scatter context without a vector.*/ 128 ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr); 129 130 /* gnerate the scatter context */ 131 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 132 133 /* 134 Post the receives for the first matrix vector product. We sync-chronize after 135 this on the chance that the user immediately calls MatMult() after assemblying 136 the matrix. 137 */ 138 ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 139 ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 140 141 ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr); 142 ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr); 143 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 144 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 145 baij->garray = garray; 146 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 147 ierr = ISDestroy(from);CHKERRQ(ierr); 148 ierr = ISDestroy(to);CHKERRQ(ierr); 149 ierr = VecDestroy(gvec);CHKERRQ(ierr); 150 PetscFunctionReturn(0); 151 } 152 153 /* 154 Takes the local part of an already assembled MPIBAIJ matrix 155 and disassembles it. This is to allow new nonzeros into the matrix 156 that require more communication in the matrix vector multiply. 157 Thus certain data-structures must be rebuilt. 158 159 Kind of slow! But that's what application programmers get when 160 they are sloppy. 161 */ 162 #undef __FUNCT__ 163 #define __FUNCT__ "DisAssemble_MPIBAIJ" 164 PetscErrorCode DisAssemble_MPIBAIJ(Mat A) 165 { 166 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 167 Mat B = baij->B,Bnew; 168 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 169 PetscErrorCode ierr; 170 PetscInt i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray; 171 PetscInt bs2 = baij->bs2,*nz,ec,m = A->m; 172 MatScalar *a = Bbaij->a; 173 PetscScalar *atmp; 174 #if defined(PETSC_USE_MAT_SINGLE) 175 PetscInt k; 176 #endif 177 178 PetscFunctionBegin; 179 /* free stuff related to matrix-vec multiply */ 180 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 181 ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 182 ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 183 if (baij->colmap) { 184 #if defined (PETSC_USE_CTABLE) 185 ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 186 #else 187 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 188 baij->colmap = 0; 189 ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr); 190 #endif 191 } 192 193 /* make sure that B is assembled so we can access its values */ 194 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 195 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 196 197 /* invent new B and copy stuff over */ 198 ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr); 199 for (i=0; i<mbs; i++) { 200 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 201 } 202 ierr = MatCreate(B->comm,&Bnew);CHKERRQ(ierr); 203 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 204 ierr = MatSetType(Bnew,B->type_name);CHKERRQ(ierr); 205 ierr = MatSeqBAIJSetPreallocation(Bnew,B->bs,0,nz);CHKERRQ(ierr); 206 ierr = MatSetOption(Bnew,MAT_COLUMN_ORIENTED);CHKERRQ(ierr); 207 208 #if defined(PETSC_USE_MAT_SINGLE) 209 ierr = PetscMalloc(bs2*sizeof(PetscScalar),&atmp);CHKERRQ(ierr); 210 #endif 211 for (i=0; i<mbs; i++) { 212 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 213 col = garray[Bbaij->j[j]]; 214 #if defined(PETSC_USE_MAT_SINGLE) 215 for (k=0; k<bs2; k++) atmp[k] = a[j*bs2+k]; 216 #else 217 atmp = a + j*bs2; 218 #endif 219 ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 220 } 221 } 222 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED);CHKERRQ(ierr); 223 224 #if defined(PETSC_USE_MAT_SINGLE) 225 ierr = PetscFree(atmp);CHKERRQ(ierr); 226 #endif 227 228 ierr = PetscFree(nz);CHKERRQ(ierr); 229 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 230 baij->garray = 0; 231 ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 232 ierr = MatDestroy(B);CHKERRQ(ierr); 233 ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr); 234 baij->B = Bnew; 235 A->was_assembled = PETSC_FALSE; 236 PetscFunctionReturn(0); 237 } 238 239 /* ugly stuff added for Glenn someday we should fix this up */ 240 241 static PetscInt *uglyrmapd = 0,*uglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" 242 parts of the local matrix */ 243 static Vec uglydd = 0,uglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 244 245 246 #undef __FUNCT__ 247 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp" 248 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 249 { 250 Mat_MPIBAIJ *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */ 251 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)ina->B->data; 252 PetscErrorCode ierr; 253 PetscInt bs = inA->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices; 254 PetscInt *r_rmapd,*r_rmapo; 255 256 PetscFunctionBegin; 257 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 258 ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr); 259 ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr); 260 ierr = PetscMemzero(r_rmapd,inA->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr); 261 nt = 0; 262 for (i=0; i<inA->bmapping->n; i++) { 263 if (inA->bmapping->indices[i]*bs >= cstart && inA->bmapping->indices[i]*bs < cend) { 264 nt++; 265 r_rmapd[i] = inA->bmapping->indices[i] + 1; 266 } 267 } 268 if (nt*bs != n) SETERRQ2(PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n); 269 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&uglyrmapd);CHKERRQ(ierr); 270 for (i=0; i<inA->bmapping->n; i++) { 271 if (r_rmapd[i]){ 272 for (j=0; j<bs; j++) { 273 uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j; 274 } 275 } 276 } 277 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 278 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr); 279 280 ierr = PetscMalloc((ina->Nbs+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr); 281 ierr = PetscMemzero(lindices,ina->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 282 for (i=0; i<B->nbs; i++) { 283 lindices[garray[i]] = i+1; 284 } 285 no = inA->bmapping->n - nt; 286 ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr); 287 ierr = PetscMemzero(r_rmapo,inA->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr); 288 nt = 0; 289 for (i=0; i<inA->bmapping->n; i++) { 290 if (lindices[inA->bmapping->indices[i]]) { 291 nt++; 292 r_rmapo[i] = lindices[inA->bmapping->indices[i]]; 293 } 294 } 295 if (nt > no) SETERRQ2(PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 296 ierr = PetscFree(lindices);CHKERRQ(ierr); 297 ierr = PetscMalloc((nt*bs+1)*sizeof(PetscInt),&uglyrmapo);CHKERRQ(ierr); 298 for (i=0; i<inA->bmapping->n; i++) { 299 if (r_rmapo[i]){ 300 for (j=0; j<bs; j++) { 301 uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j; 302 } 303 } 304 } 305 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 306 ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr); 307 308 PetscFunctionReturn(0); 309 } 310 311 #undef __FUNCT__ 312 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal" 313 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale) 314 { 315 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 316 PetscErrorCode ierr,(*f)(Mat,Vec); 317 318 PetscFunctionBegin; 319 ierr = PetscObjectQueryFunction((PetscObject)A,"MatDiagonalScaleLocal_C",(void (**)(void))&f);CHKERRQ(ierr); 320 if (f) { 321 ierr = (*f)(A,scale);CHKERRQ(ierr); 322 } 323 PetscFunctionReturn(0); 324 } 325 326 EXTERN_C_BEGIN 327 #undef __FUNCT__ 328 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ" 329 PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale) 330 { 331 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */ 332 PetscErrorCode ierr; 333 PetscInt n,i; 334 PetscScalar *d,*o,*s; 335 336 PetscFunctionBegin; 337 if (!uglyrmapd) { 338 ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 339 } 340 341 ierr = VecGetArray(scale,&s);CHKERRQ(ierr); 342 343 ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr); 344 ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr); 345 for (i=0; i<n; i++) { 346 d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 347 } 348 ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr); 349 /* column scale "diagonal" portion of local matrix */ 350 ierr = MatDiagonalScale(a->A,PETSC_NULL,uglydd);CHKERRQ(ierr); 351 352 ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr); 353 ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr); 354 for (i=0; i<n; i++) { 355 o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 356 } 357 ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr); 358 ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr); 359 /* column scale "off-diagonal" portion of local matrix */ 360 ierr = MatDiagonalScale(a->B,PETSC_NULL,uglyoo);CHKERRQ(ierr); 361 362 PetscFunctionReturn(0); 363 } 364 EXTERN_C_END 365 366 367