1 #define PETSCMAT_DLL 2 3 /* 4 Support for the parallel BAIJ matrix vector multiply 5 */ 6 #include "../src/mat/impls/baij/mpi/mpibaij.h" 7 8 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ" 12 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat) 13 { 14 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 15 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 16 PetscErrorCode ierr; 17 PetscInt i,j,*aj = B->j,ec = 0,*garray; 18 PetscInt bs = mat->rmap->bs,*stmp; 19 IS from,to; 20 Vec gvec; 21 #if defined (PETSC_USE_CTABLE) 22 PetscTable gid1_lid1; 23 PetscTablePosition tpos; 24 PetscInt gid,lid; 25 #else 26 PetscInt Nbs = baij->Nbs,*indices; 27 #endif 28 29 PetscFunctionBegin; 30 31 #if defined (PETSC_USE_CTABLE) 32 /* use a table - Mark Adams */ 33 ierr = PetscTableCreate(B->mbs,&gid1_lid1);CHKERRQ(ierr); 34 for (i=0; i<B->mbs; i++) { 35 for (j=0; j<B->ilen[i]; j++) { 36 PetscInt data,gid1 = aj[B->i[i]+j] + 1; 37 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 38 if (!data) { 39 /* one based table */ 40 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 41 } 42 } 43 } 44 /* form array of columns we need */ 45 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 46 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 47 while (tpos) { 48 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 49 gid--; lid--; 50 garray[lid] = gid; 51 } 52 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 53 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 54 for (i=0; i<ec; i++) { 55 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 56 } 57 /* compact out the extra columns in B */ 58 for (i=0; i<B->mbs; i++) { 59 for (j=0; j<B->ilen[i]; j++) { 60 PetscInt gid1 = aj[B->i[i] + j] + 1; 61 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 62 lid --; 63 aj[B->i[i]+j] = lid; 64 } 65 } 66 B->nbs = ec; 67 baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs; 68 ierr = PetscMapSetUp((baij->B->cmap));CHKERRQ(ierr); 69 ierr = PetscTableDestroy(gid1_lid1);CHKERRQ(ierr); 70 /* Mark Adams */ 71 #else 72 /* Make an array as long as the number of columns */ 73 /* mark those columns that are in baij->B */ 74 ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr); 75 ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); 76 for (i=0; i<B->mbs; i++) { 77 for (j=0; j<B->ilen[i]; j++) { 78 if (!indices[aj[B->i[i] + j]]) ec++; 79 indices[aj[B->i[i] + j]] = 1; 80 } 81 } 82 83 /* form array of columns we need */ 84 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 85 ec = 0; 86 for (i=0; i<Nbs; i++) { 87 if (indices[i]) { 88 garray[ec++] = i; 89 } 90 } 91 92 /* make indices now point into garray */ 93 for (i=0; i<ec; i++) { 94 indices[garray[i]] = i; 95 } 96 97 /* compact out the extra columns in B */ 98 for (i=0; i<B->mbs; i++) { 99 for (j=0; j<B->ilen[i]; j++) { 100 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 101 } 102 } 103 B->nbs = ec; 104 baij->B->cmap->n =baij->B->cmap->N = ec*mat->rmap->bs; 105 ierr = PetscMapSetUp((baij->B->cmap));CHKERRQ(ierr); 106 ierr = PetscFree(indices);CHKERRQ(ierr); 107 #endif 108 109 /* create local vector that is used to scatter into */ 110 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 111 112 /* create two temporary index sets for building scatter-gather */ 113 for (i=0; i<ec; i++) { 114 garray[i] = bs*garray[i]; 115 } 116 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr); 117 for (i=0; i<ec; i++) { 118 garray[i] = garray[i]/bs; 119 } 120 121 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr); 122 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 123 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 124 ierr = PetscFree(stmp);CHKERRQ(ierr); 125 126 /* create temporary global vector to generate scatter context */ 127 ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 128 129 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 130 131 ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr); 132 ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr); 133 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 134 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 135 baij->garray = garray; 136 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 137 ierr = ISDestroy(from);CHKERRQ(ierr); 138 ierr = ISDestroy(to);CHKERRQ(ierr); 139 ierr = VecDestroy(gvec);CHKERRQ(ierr); 140 PetscFunctionReturn(0); 141 } 142 143 /* 144 Takes the local part of an already assembled MPIBAIJ matrix 145 and disassembles it. This is to allow new nonzeros into the matrix 146 that require more communication in the matrix vector multiply. 147 Thus certain data-structures must be rebuilt. 148 149 Kind of slow! But that's what application programmers get when 150 they are sloppy. 151 */ 152 #undef __FUNCT__ 153 #define __FUNCT__ "DisAssemble_MPIBAIJ" 154 PetscErrorCode DisAssemble_MPIBAIJ(Mat A) 155 { 156 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 157 Mat B = baij->B,Bnew; 158 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 159 PetscErrorCode ierr; 160 PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray; 161 PetscInt bs2 = baij->bs2,*nz,ec,m = A->rmap->n; 162 MatScalar *a = Bbaij->a; 163 MatScalar *atmp; 164 165 166 PetscFunctionBegin; 167 /* free stuff related to matrix-vec multiply */ 168 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 169 ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 170 ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 171 if (baij->colmap) { 172 #if defined (PETSC_USE_CTABLE) 173 ierr = PetscTableDestroy(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 174 #else 175 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 176 baij->colmap = 0; 177 ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr); 178 #endif 179 } 180 181 /* make sure that B is assembled so we can access its values */ 182 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 183 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 184 185 /* invent new B and copy stuff over */ 186 ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr); 187 for (i=0; i<mbs; i++) { 188 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 189 } 190 ierr = MatCreate(((PetscObject)B)->comm,&Bnew);CHKERRQ(ierr); 191 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 192 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 193 ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr); 194 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 195 196 for (i=0; i<mbs; i++) { 197 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 198 col = garray[Bbaij->j[j]]; 199 atmp = a + j*bs2; 200 ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 201 } 202 } 203 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 204 205 ierr = PetscFree(nz);CHKERRQ(ierr); 206 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 207 baij->garray = 0; 208 ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 209 ierr = MatDestroy(B);CHKERRQ(ierr); 210 ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr); 211 baij->B = Bnew; 212 A->was_assembled = PETSC_FALSE; 213 PetscFunctionReturn(0); 214 } 215 216 /* ugly stuff added for Glenn someday we should fix this up */ 217 218 static PetscInt *uglyrmapd = 0,*uglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" 219 parts of the local matrix */ 220 static Vec uglydd = 0,uglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 221 222 223 #undef __FUNCT__ 224 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp" 225 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 226 { 227 Mat_MPIBAIJ *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */ 228 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)ina->B->data; 229 PetscErrorCode ierr; 230 PetscInt bs = inA->rmap->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices; 231 PetscInt *r_rmapd,*r_rmapo; 232 233 PetscFunctionBegin; 234 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 235 ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr); 236 ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr); 237 ierr = PetscMemzero(r_rmapd,inA->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr); 238 nt = 0; 239 for (i=0; i<inA->bmapping->n; i++) { 240 if (inA->bmapping->indices[i]*bs >= cstart && inA->bmapping->indices[i]*bs < cend) { 241 nt++; 242 r_rmapd[i] = inA->bmapping->indices[i] + 1; 243 } 244 } 245 if (nt*bs != n) SETERRQ2(PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n); 246 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&uglyrmapd);CHKERRQ(ierr); 247 for (i=0; i<inA->bmapping->n; i++) { 248 if (r_rmapd[i]){ 249 for (j=0; j<bs; j++) { 250 uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j; 251 } 252 } 253 } 254 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 255 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr); 256 257 ierr = PetscMalloc((ina->Nbs+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr); 258 ierr = PetscMemzero(lindices,ina->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 259 for (i=0; i<B->nbs; i++) { 260 lindices[garray[i]] = i+1; 261 } 262 no = inA->bmapping->n - nt; 263 ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr); 264 ierr = PetscMemzero(r_rmapo,inA->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr); 265 nt = 0; 266 for (i=0; i<inA->bmapping->n; i++) { 267 if (lindices[inA->bmapping->indices[i]]) { 268 nt++; 269 r_rmapo[i] = lindices[inA->bmapping->indices[i]]; 270 } 271 } 272 if (nt > no) SETERRQ2(PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 273 ierr = PetscFree(lindices);CHKERRQ(ierr); 274 ierr = PetscMalloc((nt*bs+1)*sizeof(PetscInt),&uglyrmapo);CHKERRQ(ierr); 275 for (i=0; i<inA->bmapping->n; i++) { 276 if (r_rmapo[i]){ 277 for (j=0; j<bs; j++) { 278 uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j; 279 } 280 } 281 } 282 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 283 ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr); 284 285 PetscFunctionReturn(0); 286 } 287 288 #undef __FUNCT__ 289 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal" 290 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale) 291 { 292 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 293 PetscErrorCode ierr,(*f)(Mat,Vec); 294 295 PetscFunctionBegin; 296 ierr = PetscObjectQueryFunction((PetscObject)A,"MatDiagonalScaleLocal_C",(void (**)(void))&f);CHKERRQ(ierr); 297 if (f) { 298 ierr = (*f)(A,scale);CHKERRQ(ierr); 299 } 300 PetscFunctionReturn(0); 301 } 302 303 EXTERN_C_BEGIN 304 #undef __FUNCT__ 305 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ" 306 PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale) 307 { 308 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */ 309 PetscErrorCode ierr; 310 PetscInt n,i; 311 PetscScalar *d,*o,*s; 312 313 PetscFunctionBegin; 314 if (!uglyrmapd) { 315 ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 316 } 317 318 ierr = VecGetArray(scale,&s);CHKERRQ(ierr); 319 320 ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr); 321 ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr); 322 for (i=0; i<n; i++) { 323 d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 324 } 325 ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr); 326 /* column scale "diagonal" portion of local matrix */ 327 ierr = MatDiagonalScale(a->A,PETSC_NULL,uglydd);CHKERRQ(ierr); 328 329 ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr); 330 ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr); 331 for (i=0; i<n; i++) { 332 o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 333 } 334 ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr); 335 ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr); 336 /* column scale "off-diagonal" portion of local matrix */ 337 ierr = MatDiagonalScale(a->B,PETSC_NULL,uglyoo);CHKERRQ(ierr); 338 339 PetscFunctionReturn(0); 340 } 341 EXTERN_C_END 342 343 344