1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 #if defined(PETSC_HAVE_HYPRE) 8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 9 #endif 10 11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 12 { 13 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 14 PetscErrorCode ierr; 15 PetscInt i,*idxb = NULL,m = A->rmap->n,bs = A->cmap->bs; 16 PetscScalar *va,*vv; 17 Vec vB,vA; 18 const PetscScalar *vb; 19 20 PetscFunctionBegin; 21 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr); 22 ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr); 23 24 ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr); 25 if (idx) { 26 for (i=0; i<m; i++) { 27 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 28 } 29 } 30 31 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr); 32 ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr); 33 ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr); 34 35 ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr); 36 ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr); 37 for (i=0; i<m; i++) { 38 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 39 vv[i] = vb[i]; 40 if (idx) idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs); 41 } else { 42 vv[i] = va[i]; 43 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs*a->garray[idxb[i]/bs] + (idxb[i] % bs)) 44 idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs); 45 } 46 } 47 ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr); 48 ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr); 49 ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr); 50 ierr = PetscFree(idxb);CHKERRQ(ierr); 51 ierr = VecDestroy(&vA);CHKERRQ(ierr); 52 ierr = VecDestroy(&vB);CHKERRQ(ierr); 53 PetscFunctionReturn(0); 54 } 55 56 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 57 { 58 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 59 PetscErrorCode ierr; 60 61 PetscFunctionBegin; 62 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 63 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 64 PetscFunctionReturn(0); 65 } 66 67 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 68 { 69 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 70 PetscErrorCode ierr; 71 72 PetscFunctionBegin; 73 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 74 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 75 PetscFunctionReturn(0); 76 } 77 78 /* 79 Local utility routine that creates a mapping from the global column 80 number to the local number in the off-diagonal part of the local 81 storage of the matrix. This is done in a non scalable way since the 82 length of colmap equals the global matrix length. 83 */ 84 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 85 { 86 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 87 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 88 PetscErrorCode ierr; 89 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 90 91 PetscFunctionBegin; 92 #if defined(PETSC_USE_CTABLE) 93 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 94 for (i=0; i<nbs; i++) { 95 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 96 } 97 #else 98 ierr = PetscCalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 99 ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 100 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 101 #endif 102 PetscFunctionReturn(0); 103 } 104 105 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 106 { \ 107 brow = row/bs; \ 108 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 109 rmax = aimax[brow]; nrow = ailen[brow]; \ 110 bcol = col/bs; \ 111 ridx = row % bs; cidx = col % bs; \ 112 low = 0; high = nrow; \ 113 while (high-low > 3) { \ 114 t = (low+high)/2; \ 115 if (rp[t] > bcol) high = t; \ 116 else low = t; \ 117 } \ 118 for (_i=low; _i<high; _i++) { \ 119 if (rp[_i] > bcol) break; \ 120 if (rp[_i] == bcol) { \ 121 bap = ap + bs2*_i + bs*cidx + ridx; \ 122 if (addv == ADD_VALUES) *bap += value; \ 123 else *bap = value; \ 124 goto a_noinsert; \ 125 } \ 126 } \ 127 if (a->nonew == 1) goto a_noinsert; \ 128 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 129 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 130 N = nrow++ - 1; \ 131 /* shift up all the later entries in this row */ \ 132 ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\ 133 ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \ 134 ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \ 135 rp[_i] = bcol; \ 136 ap[bs2*_i + bs*cidx + ridx] = value; \ 137 a_noinsert:; \ 138 ailen[brow] = nrow; \ 139 } 140 141 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 142 { \ 143 brow = row/bs; \ 144 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 145 rmax = bimax[brow]; nrow = bilen[brow]; \ 146 bcol = col/bs; \ 147 ridx = row % bs; cidx = col % bs; \ 148 low = 0; high = nrow; \ 149 while (high-low > 3) { \ 150 t = (low+high)/2; \ 151 if (rp[t] > bcol) high = t; \ 152 else low = t; \ 153 } \ 154 for (_i=low; _i<high; _i++) { \ 155 if (rp[_i] > bcol) break; \ 156 if (rp[_i] == bcol) { \ 157 bap = ap + bs2*_i + bs*cidx + ridx; \ 158 if (addv == ADD_VALUES) *bap += value; \ 159 else *bap = value; \ 160 goto b_noinsert; \ 161 } \ 162 } \ 163 if (b->nonew == 1) goto b_noinsert; \ 164 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 165 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 166 N = nrow++ - 1; \ 167 /* shift up all the later entries in this row */ \ 168 ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\ 169 ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr);\ 170 ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \ 171 rp[_i] = bcol; \ 172 ap[bs2*_i + bs*cidx + ridx] = value; \ 173 b_noinsert:; \ 174 bilen[brow] = nrow; \ 175 } 176 177 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 178 { 179 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 180 MatScalar value; 181 PetscBool roworiented = baij->roworiented; 182 PetscErrorCode ierr; 183 PetscInt i,j,row,col; 184 PetscInt rstart_orig=mat->rmap->rstart; 185 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 186 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 187 188 /* Some Variables required in the macro */ 189 Mat A = baij->A; 190 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 191 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 192 MatScalar *aa =a->a; 193 194 Mat B = baij->B; 195 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 196 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 197 MatScalar *ba =b->a; 198 199 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 200 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 201 MatScalar *ap,*bap; 202 203 PetscFunctionBegin; 204 for (i=0; i<m; i++) { 205 if (im[i] < 0) continue; 206 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 207 if (im[i] >= rstart_orig && im[i] < rend_orig) { 208 row = im[i] - rstart_orig; 209 for (j=0; j<n; j++) { 210 if (in[j] >= cstart_orig && in[j] < cend_orig) { 211 col = in[j] - cstart_orig; 212 if (roworiented) value = v[i*n+j]; 213 else value = v[i+j*m]; 214 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 215 } else if (in[j] < 0) continue; 216 else if (PetscUnlikely(in[j] >= mat->cmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 217 else { 218 if (mat->was_assembled) { 219 if (!baij->colmap) { 220 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 221 } 222 #if defined(PETSC_USE_CTABLE) 223 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 224 col = col - 1; 225 #else 226 col = baij->colmap[in[j]/bs] - 1; 227 #endif 228 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 229 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 230 col = in[j]; 231 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 232 B = baij->B; 233 b = (Mat_SeqBAIJ*)(B)->data; 234 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 235 ba =b->a; 236 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 237 else col += in[j]%bs; 238 } else col = in[j]; 239 if (roworiented) value = v[i*n+j]; 240 else value = v[i+j*m]; 241 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 242 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 243 } 244 } 245 } else { 246 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 247 if (!baij->donotstash) { 248 mat->assembled = PETSC_FALSE; 249 if (roworiented) { 250 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 251 } else { 252 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 253 } 254 } 255 } 256 } 257 PetscFunctionReturn(0); 258 } 259 260 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 261 { 262 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 263 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 264 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 265 PetscErrorCode ierr; 266 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 267 PetscBool roworiented=a->roworiented; 268 const PetscScalar *value = v; 269 MatScalar *ap,*aa = a->a,*bap; 270 271 PetscFunctionBegin; 272 rp = aj + ai[row]; 273 ap = aa + bs2*ai[row]; 274 rmax = imax[row]; 275 nrow = ailen[row]; 276 value = v; 277 low = 0; 278 high = nrow; 279 while (high-low > 7) { 280 t = (low+high)/2; 281 if (rp[t] > col) high = t; 282 else low = t; 283 } 284 for (i=low; i<high; i++) { 285 if (rp[i] > col) break; 286 if (rp[i] == col) { 287 bap = ap + bs2*i; 288 if (roworiented) { 289 if (is == ADD_VALUES) { 290 for (ii=0; ii<bs; ii++) { 291 for (jj=ii; jj<bs2; jj+=bs) { 292 bap[jj] += *value++; 293 } 294 } 295 } else { 296 for (ii=0; ii<bs; ii++) { 297 for (jj=ii; jj<bs2; jj+=bs) { 298 bap[jj] = *value++; 299 } 300 } 301 } 302 } else { 303 if (is == ADD_VALUES) { 304 for (ii=0; ii<bs; ii++,value+=bs) { 305 for (jj=0; jj<bs; jj++) { 306 bap[jj] += value[jj]; 307 } 308 bap += bs; 309 } 310 } else { 311 for (ii=0; ii<bs; ii++,value+=bs) { 312 for (jj=0; jj<bs; jj++) { 313 bap[jj] = value[jj]; 314 } 315 bap += bs; 316 } 317 } 318 } 319 goto noinsert2; 320 } 321 } 322 if (nonew == 1) goto noinsert2; 323 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 324 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 325 N = nrow++ - 1; high++; 326 /* shift up all the later entries in this row */ 327 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 328 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 329 rp[i] = col; 330 bap = ap + bs2*i; 331 if (roworiented) { 332 for (ii=0; ii<bs; ii++) { 333 for (jj=ii; jj<bs2; jj+=bs) { 334 bap[jj] = *value++; 335 } 336 } 337 } else { 338 for (ii=0; ii<bs; ii++) { 339 for (jj=0; jj<bs; jj++) { 340 *bap++ = *value++; 341 } 342 } 343 } 344 noinsert2:; 345 ailen[row] = nrow; 346 PetscFunctionReturn(0); 347 } 348 349 /* 350 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 351 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 352 */ 353 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 354 { 355 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 356 const PetscScalar *value; 357 MatScalar *barray = baij->barray; 358 PetscBool roworiented = baij->roworiented; 359 PetscErrorCode ierr; 360 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 361 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 362 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 363 364 PetscFunctionBegin; 365 if (!barray) { 366 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 367 baij->barray = barray; 368 } 369 370 if (roworiented) stepval = (n-1)*bs; 371 else stepval = (m-1)*bs; 372 373 for (i=0; i<m; i++) { 374 if (im[i] < 0) continue; 375 if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 376 if (im[i] >= rstart && im[i] < rend) { 377 row = im[i] - rstart; 378 for (j=0; j<n; j++) { 379 /* If NumCol = 1 then a copy is not required */ 380 if ((roworiented) && (n == 1)) { 381 barray = (MatScalar*)v + i*bs2; 382 } else if ((!roworiented) && (m == 1)) { 383 barray = (MatScalar*)v + j*bs2; 384 } else { /* Here a copy is required */ 385 if (roworiented) { 386 value = v + (i*(stepval+bs) + j)*bs; 387 } else { 388 value = v + (j*(stepval+bs) + i)*bs; 389 } 390 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 391 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 392 barray += bs; 393 } 394 barray -= bs2; 395 } 396 397 if (in[j] >= cstart && in[j] < cend) { 398 col = in[j] - cstart; 399 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 400 } else if (in[j] < 0) continue; 401 else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 402 else { 403 if (mat->was_assembled) { 404 if (!baij->colmap) { 405 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 406 } 407 408 #if defined(PETSC_USE_DEBUG) 409 #if defined(PETSC_USE_CTABLE) 410 { PetscInt data; 411 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 412 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 413 } 414 #else 415 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 416 #endif 417 #endif 418 #if defined(PETSC_USE_CTABLE) 419 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 420 col = (col - 1)/bs; 421 #else 422 col = (baij->colmap[in[j]] - 1)/bs; 423 #endif 424 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 425 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 426 col = in[j]; 427 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]); 428 } else col = in[j]; 429 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 430 } 431 } 432 } else { 433 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 434 if (!baij->donotstash) { 435 if (roworiented) { 436 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 437 } else { 438 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 439 } 440 } 441 } 442 } 443 PetscFunctionReturn(0); 444 } 445 446 #define HASH_KEY 0.6180339887 447 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 448 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 449 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 450 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 451 { 452 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 453 PetscBool roworiented = baij->roworiented; 454 PetscErrorCode ierr; 455 PetscInt i,j,row,col; 456 PetscInt rstart_orig=mat->rmap->rstart; 457 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 458 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 459 PetscReal tmp; 460 MatScalar **HD = baij->hd,value; 461 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 462 463 PetscFunctionBegin; 464 for (i=0; i<m; i++) { 465 if (PetscDefined(USE_DEBUG)) { 466 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 467 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 468 } 469 row = im[i]; 470 if (row >= rstart_orig && row < rend_orig) { 471 for (j=0; j<n; j++) { 472 col = in[j]; 473 if (roworiented) value = v[i*n+j]; 474 else value = v[i+j*m]; 475 /* Look up PetscInto the Hash Table */ 476 key = (row/bs)*Nbs+(col/bs)+1; 477 h1 = HASH(size,key,tmp); 478 479 480 idx = h1; 481 if (PetscDefined(USE_DEBUG)) { 482 insert_ct++; 483 total_ct++; 484 if (HT[idx] != key) { 485 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 486 if (idx == size) { 487 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 488 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 489 } 490 } 491 } else if (HT[idx] != key) { 492 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 493 if (idx == size) { 494 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 495 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 496 } 497 } 498 /* A HASH table entry is found, so insert the values at the correct address */ 499 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 500 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 501 } 502 } else if (!baij->donotstash) { 503 if (roworiented) { 504 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 505 } else { 506 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 507 } 508 } 509 } 510 if (PetscDefined(USE_DEBUG)) { 511 baij->ht_total_ct += total_ct; 512 baij->ht_insert_ct += insert_ct; 513 } 514 PetscFunctionReturn(0); 515 } 516 517 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 518 { 519 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 520 PetscBool roworiented = baij->roworiented; 521 PetscErrorCode ierr; 522 PetscInt i,j,ii,jj,row,col; 523 PetscInt rstart=baij->rstartbs; 524 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 525 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 526 PetscReal tmp; 527 MatScalar **HD = baij->hd,*baij_a; 528 const PetscScalar *v_t,*value; 529 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 530 531 PetscFunctionBegin; 532 if (roworiented) stepval = (n-1)*bs; 533 else stepval = (m-1)*bs; 534 535 for (i=0; i<m; i++) { 536 if (PetscDefined(USE_DEBUG)) { 537 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 538 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 539 } 540 row = im[i]; 541 v_t = v + i*nbs2; 542 if (row >= rstart && row < rend) { 543 for (j=0; j<n; j++) { 544 col = in[j]; 545 546 /* Look up into the Hash Table */ 547 key = row*Nbs+col+1; 548 h1 = HASH(size,key,tmp); 549 550 idx = h1; 551 if (PetscDefined(USE_DEBUG)) { 552 total_ct++; 553 insert_ct++; 554 if (HT[idx] != key) { 555 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 556 if (idx == size) { 557 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 558 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 559 } 560 } 561 } else if (HT[idx] != key) { 562 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 563 if (idx == size) { 564 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 565 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 566 } 567 } 568 baij_a = HD[idx]; 569 if (roworiented) { 570 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 571 /* value = v + (i*(stepval+bs)+j)*bs; */ 572 value = v_t; 573 v_t += bs; 574 if (addv == ADD_VALUES) { 575 for (ii=0; ii<bs; ii++,value+=stepval) { 576 for (jj=ii; jj<bs2; jj+=bs) { 577 baij_a[jj] += *value++; 578 } 579 } 580 } else { 581 for (ii=0; ii<bs; ii++,value+=stepval) { 582 for (jj=ii; jj<bs2; jj+=bs) { 583 baij_a[jj] = *value++; 584 } 585 } 586 } 587 } else { 588 value = v + j*(stepval+bs)*bs + i*bs; 589 if (addv == ADD_VALUES) { 590 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 591 for (jj=0; jj<bs; jj++) { 592 baij_a[jj] += *value++; 593 } 594 } 595 } else { 596 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 597 for (jj=0; jj<bs; jj++) { 598 baij_a[jj] = *value++; 599 } 600 } 601 } 602 } 603 } 604 } else { 605 if (!baij->donotstash) { 606 if (roworiented) { 607 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 608 } else { 609 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 610 } 611 } 612 } 613 } 614 if (PetscDefined(USE_DEBUG)) { 615 baij->ht_total_ct += total_ct; 616 baij->ht_insert_ct += insert_ct; 617 } 618 PetscFunctionReturn(0); 619 } 620 621 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 622 { 623 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 624 PetscErrorCode ierr; 625 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 626 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 627 628 PetscFunctionBegin; 629 for (i=0; i<m; i++) { 630 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 631 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 632 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 633 row = idxm[i] - bsrstart; 634 for (j=0; j<n; j++) { 635 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 636 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 637 if (idxn[j] >= bscstart && idxn[j] < bscend) { 638 col = idxn[j] - bscstart; 639 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 640 } else { 641 if (!baij->colmap) { 642 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 643 } 644 #if defined(PETSC_USE_CTABLE) 645 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 646 data--; 647 #else 648 data = baij->colmap[idxn[j]/bs]-1; 649 #endif 650 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 651 else { 652 col = data + idxn[j]%bs; 653 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 654 } 655 } 656 } 657 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 658 } 659 PetscFunctionReturn(0); 660 } 661 662 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 663 { 664 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 665 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 666 PetscErrorCode ierr; 667 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 668 PetscReal sum = 0.0; 669 MatScalar *v; 670 671 PetscFunctionBegin; 672 if (baij->size == 1) { 673 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 674 } else { 675 if (type == NORM_FROBENIUS) { 676 v = amat->a; 677 nz = amat->nz*bs2; 678 for (i=0; i<nz; i++) { 679 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 680 } 681 v = bmat->a; 682 nz = bmat->nz*bs2; 683 for (i=0; i<nz; i++) { 684 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 685 } 686 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 687 *nrm = PetscSqrtReal(*nrm); 688 } else if (type == NORM_1) { /* max column sum */ 689 PetscReal *tmp,*tmp2; 690 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 691 ierr = PetscCalloc1(mat->cmap->N,&tmp);CHKERRQ(ierr); 692 ierr = PetscMalloc1(mat->cmap->N,&tmp2);CHKERRQ(ierr); 693 v = amat->a; jj = amat->j; 694 for (i=0; i<amat->nz; i++) { 695 for (j=0; j<bs; j++) { 696 col = bs*(cstart + *jj) + j; /* column index */ 697 for (row=0; row<bs; row++) { 698 tmp[col] += PetscAbsScalar(*v); v++; 699 } 700 } 701 jj++; 702 } 703 v = bmat->a; jj = bmat->j; 704 for (i=0; i<bmat->nz; i++) { 705 for (j=0; j<bs; j++) { 706 col = bs*garray[*jj] + j; 707 for (row=0; row<bs; row++) { 708 tmp[col] += PetscAbsScalar(*v); v++; 709 } 710 } 711 jj++; 712 } 713 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 714 *nrm = 0.0; 715 for (j=0; j<mat->cmap->N; j++) { 716 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 717 } 718 ierr = PetscFree(tmp);CHKERRQ(ierr); 719 ierr = PetscFree(tmp2);CHKERRQ(ierr); 720 } else if (type == NORM_INFINITY) { /* max row sum */ 721 PetscReal *sums; 722 ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr); 723 sum = 0.0; 724 for (j=0; j<amat->mbs; j++) { 725 for (row=0; row<bs; row++) sums[row] = 0.0; 726 v = amat->a + bs2*amat->i[j]; 727 nz = amat->i[j+1]-amat->i[j]; 728 for (i=0; i<nz; i++) { 729 for (col=0; col<bs; col++) { 730 for (row=0; row<bs; row++) { 731 sums[row] += PetscAbsScalar(*v); v++; 732 } 733 } 734 } 735 v = bmat->a + bs2*bmat->i[j]; 736 nz = bmat->i[j+1]-bmat->i[j]; 737 for (i=0; i<nz; i++) { 738 for (col=0; col<bs; col++) { 739 for (row=0; row<bs; row++) { 740 sums[row] += PetscAbsScalar(*v); v++; 741 } 742 } 743 } 744 for (row=0; row<bs; row++) { 745 if (sums[row] > sum) sum = sums[row]; 746 } 747 } 748 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 749 ierr = PetscFree(sums);CHKERRQ(ierr); 750 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 751 } 752 PetscFunctionReturn(0); 753 } 754 755 /* 756 Creates the hash table, and sets the table 757 This table is created only once. 758 If new entried need to be added to the matrix 759 then the hash table has to be destroyed and 760 recreated. 761 */ 762 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 763 { 764 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 765 Mat A = baij->A,B=baij->B; 766 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 767 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 768 PetscErrorCode ierr; 769 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 770 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 771 PetscInt *HT,key; 772 MatScalar **HD; 773 PetscReal tmp; 774 #if defined(PETSC_USE_INFO) 775 PetscInt ct=0,max=0; 776 #endif 777 778 PetscFunctionBegin; 779 if (baij->ht) PetscFunctionReturn(0); 780 781 baij->ht_size = (PetscInt)(factor*nz); 782 ht_size = baij->ht_size; 783 784 /* Allocate Memory for Hash Table */ 785 ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr); 786 HD = baij->hd; 787 HT = baij->ht; 788 789 /* Loop Over A */ 790 for (i=0; i<a->mbs; i++) { 791 for (j=ai[i]; j<ai[i+1]; j++) { 792 row = i+rstart; 793 col = aj[j]+cstart; 794 795 key = row*Nbs + col + 1; 796 h1 = HASH(ht_size,key,tmp); 797 for (k=0; k<ht_size; k++) { 798 if (!HT[(h1+k)%ht_size]) { 799 HT[(h1+k)%ht_size] = key; 800 HD[(h1+k)%ht_size] = a->a + j*bs2; 801 break; 802 #if defined(PETSC_USE_INFO) 803 } else { 804 ct++; 805 #endif 806 } 807 } 808 #if defined(PETSC_USE_INFO) 809 if (k> max) max = k; 810 #endif 811 } 812 } 813 /* Loop Over B */ 814 for (i=0; i<b->mbs; i++) { 815 for (j=bi[i]; j<bi[i+1]; j++) { 816 row = i+rstart; 817 col = garray[bj[j]]; 818 key = row*Nbs + col + 1; 819 h1 = HASH(ht_size,key,tmp); 820 for (k=0; k<ht_size; k++) { 821 if (!HT[(h1+k)%ht_size]) { 822 HT[(h1+k)%ht_size] = key; 823 HD[(h1+k)%ht_size] = b->a + j*bs2; 824 break; 825 #if defined(PETSC_USE_INFO) 826 } else { 827 ct++; 828 #endif 829 } 830 } 831 #if defined(PETSC_USE_INFO) 832 if (k> max) max = k; 833 #endif 834 } 835 } 836 837 /* Print Summary */ 838 #if defined(PETSC_USE_INFO) 839 for (i=0,j=0; i<ht_size; i++) { 840 if (HT[i]) j++; 841 } 842 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 843 #endif 844 PetscFunctionReturn(0); 845 } 846 847 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 848 { 849 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 850 PetscErrorCode ierr; 851 PetscInt nstash,reallocs; 852 853 PetscFunctionBegin; 854 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 855 856 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 857 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 858 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 859 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 860 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 861 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 862 PetscFunctionReturn(0); 863 } 864 865 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 866 { 867 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 868 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 869 PetscErrorCode ierr; 870 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 871 PetscInt *row,*col; 872 PetscBool r1,r2,r3,other_disassembled; 873 MatScalar *val; 874 PetscMPIInt n; 875 876 PetscFunctionBegin; 877 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 878 if (!baij->donotstash && !mat->nooffprocentries) { 879 while (1) { 880 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 881 if (!flg) break; 882 883 for (i=0; i<n;) { 884 /* Now identify the consecutive vals belonging to the same row */ 885 for (j=i,rstart=row[j]; j<n; j++) { 886 if (row[j] != rstart) break; 887 } 888 if (j < n) ncols = j-i; 889 else ncols = n-i; 890 /* Now assemble all these values with a single function call */ 891 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 892 i = j; 893 } 894 } 895 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 896 /* Now process the block-stash. Since the values are stashed column-oriented, 897 set the roworiented flag to column oriented, and after MatSetValues() 898 restore the original flags */ 899 r1 = baij->roworiented; 900 r2 = a->roworiented; 901 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 902 903 baij->roworiented = PETSC_FALSE; 904 a->roworiented = PETSC_FALSE; 905 906 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 907 while (1) { 908 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 909 if (!flg) break; 910 911 for (i=0; i<n;) { 912 /* Now identify the consecutive vals belonging to the same row */ 913 for (j=i,rstart=row[j]; j<n; j++) { 914 if (row[j] != rstart) break; 915 } 916 if (j < n) ncols = j-i; 917 else ncols = n-i; 918 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 919 i = j; 920 } 921 } 922 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 923 924 baij->roworiented = r1; 925 a->roworiented = r2; 926 927 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 928 } 929 930 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 931 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 932 933 /* determine if any processor has disassembled, if so we must 934 also disassemble ourselfs, in order that we may reassemble. */ 935 /* 936 if nonzero structure of submatrix B cannot change then we know that 937 no processor disassembled thus we can skip this stuff 938 */ 939 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 940 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 941 if (mat->was_assembled && !other_disassembled) { 942 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 943 } 944 } 945 946 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 947 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 948 } 949 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 950 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 951 952 #if defined(PETSC_USE_INFO) 953 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 954 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 955 956 baij->ht_total_ct = 0; 957 baij->ht_insert_ct = 0; 958 } 959 #endif 960 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 961 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 962 963 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 964 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 965 } 966 967 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 968 969 baij->rowvalues = NULL; 970 971 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 972 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 973 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 974 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 975 } 976 PetscFunctionReturn(0); 977 } 978 979 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer); 980 #include <petscdraw.h> 981 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 982 { 983 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 984 PetscErrorCode ierr; 985 PetscMPIInt rank = baij->rank; 986 PetscInt bs = mat->rmap->bs; 987 PetscBool iascii,isdraw; 988 PetscViewer sviewer; 989 PetscViewerFormat format; 990 991 PetscFunctionBegin; 992 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 993 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 994 if (iascii) { 995 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 996 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 997 MatInfo info; 998 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr); 999 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1000 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1001 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n", 1002 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr); 1003 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1004 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1005 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1006 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1007 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1008 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1009 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1010 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1011 PetscFunctionReturn(0); 1012 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1013 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1014 PetscFunctionReturn(0); 1015 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1016 PetscFunctionReturn(0); 1017 } 1018 } 1019 1020 if (isdraw) { 1021 PetscDraw draw; 1022 PetscBool isnull; 1023 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1024 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1025 if (isnull) PetscFunctionReturn(0); 1026 } 1027 1028 { 1029 /* assemble the entire matrix onto first processor. */ 1030 Mat A; 1031 Mat_SeqBAIJ *Aloc; 1032 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1033 MatScalar *a; 1034 const char *matname; 1035 1036 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1037 /* Perhaps this should be the type of mat? */ 1038 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1039 if (!rank) { 1040 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1041 } else { 1042 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1043 } 1044 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1045 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 1046 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1047 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1048 1049 /* copy over the A part */ 1050 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1051 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1052 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1053 1054 for (i=0; i<mbs; i++) { 1055 rvals[0] = bs*(baij->rstartbs + i); 1056 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1057 for (j=ai[i]; j<ai[i+1]; j++) { 1058 col = (baij->cstartbs+aj[j])*bs; 1059 for (k=0; k<bs; k++) { 1060 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1061 col++; a += bs; 1062 } 1063 } 1064 } 1065 /* copy over the B part */ 1066 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1067 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1068 for (i=0; i<mbs; i++) { 1069 rvals[0] = bs*(baij->rstartbs + i); 1070 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1071 for (j=ai[i]; j<ai[i+1]; j++) { 1072 col = baij->garray[aj[j]]*bs; 1073 for (k=0; k<bs; k++) { 1074 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1075 col++; a += bs; 1076 } 1077 } 1078 } 1079 ierr = PetscFree(rvals);CHKERRQ(ierr); 1080 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1081 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1082 /* 1083 Everyone has to call to draw the matrix since the graphics waits are 1084 synchronized across all processors that share the PetscDraw object 1085 */ 1086 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1087 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 1088 if (!rank) { 1089 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 1090 ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1091 } 1092 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1093 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1094 ierr = MatDestroy(&A);CHKERRQ(ierr); 1095 } 1096 PetscFunctionReturn(0); 1097 } 1098 1099 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1100 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1101 { 1102 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 1103 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)aij->A->data; 1104 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)aij->B->data; 1105 const PetscInt *garray = aij->garray; 1106 PetscInt header[4],M,N,m,rs,cs,bs,nz,cnt,i,j,ja,jb,k,l; 1107 PetscInt *rowlens,*colidxs; 1108 PetscScalar *matvals; 1109 PetscErrorCode ierr; 1110 1111 PetscFunctionBegin; 1112 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1113 1114 M = mat->rmap->N; 1115 N = mat->cmap->N; 1116 m = mat->rmap->n; 1117 rs = mat->rmap->rstart; 1118 cs = mat->cmap->rstart; 1119 bs = mat->rmap->bs; 1120 nz = bs*bs*(A->nz + B->nz); 1121 1122 /* write matrix header */ 1123 header[0] = MAT_FILE_CLASSID; 1124 header[1] = M; header[2] = N; header[3] = nz; 1125 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1126 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1127 1128 /* fill in and store row lengths */ 1129 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1130 for (cnt=0, i=0; i<A->mbs; i++) 1131 for (j=0; j<bs; j++) 1132 rowlens[cnt++] = bs*(A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]); 1133 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1134 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1135 1136 /* fill in and store column indices */ 1137 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1138 for (cnt=0, i=0; i<A->mbs; i++) { 1139 for (k=0; k<bs; k++) { 1140 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1141 if (garray[B->j[jb]] > cs/bs) break; 1142 for (l=0; l<bs; l++) 1143 colidxs[cnt++] = bs*garray[B->j[jb]] + l; 1144 } 1145 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1146 for (l=0; l<bs; l++) 1147 colidxs[cnt++] = bs*A->j[ja] + l + cs; 1148 for (; jb<B->i[i+1]; jb++) 1149 for (l=0; l<bs; l++) 1150 colidxs[cnt++] = bs*garray[B->j[jb]] + l; 1151 } 1152 } 1153 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1154 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_INT);CHKERRQ(ierr); 1155 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1156 1157 /* fill in and store nonzero values */ 1158 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1159 for (cnt=0, i=0; i<A->mbs; i++) { 1160 for (k=0; k<bs; k++) { 1161 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1162 if (garray[B->j[jb]] > cs/bs) break; 1163 for (l=0; l<bs; l++) 1164 matvals[cnt++] = B->a[bs*(bs*jb + l) + k]; 1165 } 1166 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1167 for (l=0; l<bs; l++) 1168 matvals[cnt++] = A->a[bs*(bs*ja + l) + k]; 1169 for (; jb<B->i[i+1]; jb++) 1170 for (l=0; l<bs; l++) 1171 matvals[cnt++] = B->a[bs*(bs*jb + l) + k]; 1172 } 1173 } 1174 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_SCALAR);CHKERRQ(ierr); 1175 ierr = PetscFree(matvals);CHKERRQ(ierr); 1176 1177 /* write block size option to the viewer's .info file */ 1178 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1179 PetscFunctionReturn(0); 1180 } 1181 1182 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1183 { 1184 PetscErrorCode ierr; 1185 PetscBool iascii,isdraw,issocket,isbinary; 1186 1187 PetscFunctionBegin; 1188 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1189 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1190 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1191 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1192 if (iascii || isdraw || issocket) { 1193 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1194 } else if (isbinary) { 1195 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1196 } 1197 PetscFunctionReturn(0); 1198 } 1199 1200 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1201 { 1202 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1203 PetscErrorCode ierr; 1204 1205 PetscFunctionBegin; 1206 #if defined(PETSC_USE_LOG) 1207 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1208 #endif 1209 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1210 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1211 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1212 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1213 #if defined(PETSC_USE_CTABLE) 1214 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1215 #else 1216 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1217 #endif 1218 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1219 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1220 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1221 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1222 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1223 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1224 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1225 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1226 1227 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 1228 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1229 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1230 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1231 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1232 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1233 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr); 1234 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1235 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr); 1236 #if defined(PETSC_HAVE_HYPRE) 1237 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL);CHKERRQ(ierr); 1238 #endif 1239 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL);CHKERRQ(ierr); 1240 PetscFunctionReturn(0); 1241 } 1242 1243 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1244 { 1245 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1246 PetscErrorCode ierr; 1247 PetscInt nt; 1248 1249 PetscFunctionBegin; 1250 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1251 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1252 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1253 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1254 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1255 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1256 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1257 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1258 PetscFunctionReturn(0); 1259 } 1260 1261 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1262 { 1263 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1264 PetscErrorCode ierr; 1265 1266 PetscFunctionBegin; 1267 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1268 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1269 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1270 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1271 PetscFunctionReturn(0); 1272 } 1273 1274 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1275 { 1276 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1277 PetscErrorCode ierr; 1278 1279 PetscFunctionBegin; 1280 /* do nondiagonal part */ 1281 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1282 /* do local part */ 1283 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1284 /* add partial results together */ 1285 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1286 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1287 PetscFunctionReturn(0); 1288 } 1289 1290 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1291 { 1292 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1293 PetscErrorCode ierr; 1294 1295 PetscFunctionBegin; 1296 /* do nondiagonal part */ 1297 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1298 /* do local part */ 1299 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1300 /* add partial results together */ 1301 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1302 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1303 PetscFunctionReturn(0); 1304 } 1305 1306 /* 1307 This only works correctly for square matrices where the subblock A->A is the 1308 diagonal block 1309 */ 1310 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1311 { 1312 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1313 PetscErrorCode ierr; 1314 1315 PetscFunctionBegin; 1316 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1317 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1318 PetscFunctionReturn(0); 1319 } 1320 1321 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1322 { 1323 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1324 PetscErrorCode ierr; 1325 1326 PetscFunctionBegin; 1327 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1328 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1329 PetscFunctionReturn(0); 1330 } 1331 1332 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1333 { 1334 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1335 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1336 PetscErrorCode ierr; 1337 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1338 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1339 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1340 1341 PetscFunctionBegin; 1342 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1343 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1344 mat->getrowactive = PETSC_TRUE; 1345 1346 if (!mat->rowvalues && (idx || v)) { 1347 /* 1348 allocate enough space to hold information from the longest row. 1349 */ 1350 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1351 PetscInt max = 1,mbs = mat->mbs,tmp; 1352 for (i=0; i<mbs; i++) { 1353 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1354 if (max < tmp) max = tmp; 1355 } 1356 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1357 } 1358 lrow = row - brstart; 1359 1360 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1361 if (!v) {pvA = NULL; pvB = NULL;} 1362 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1363 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1364 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1365 nztot = nzA + nzB; 1366 1367 cmap = mat->garray; 1368 if (v || idx) { 1369 if (nztot) { 1370 /* Sort by increasing column numbers, assuming A and B already sorted */ 1371 PetscInt imark = -1; 1372 if (v) { 1373 *v = v_p = mat->rowvalues; 1374 for (i=0; i<nzB; i++) { 1375 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1376 else break; 1377 } 1378 imark = i; 1379 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1380 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1381 } 1382 if (idx) { 1383 *idx = idx_p = mat->rowindices; 1384 if (imark > -1) { 1385 for (i=0; i<imark; i++) { 1386 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1387 } 1388 } else { 1389 for (i=0; i<nzB; i++) { 1390 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1391 else break; 1392 } 1393 imark = i; 1394 } 1395 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1396 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1397 } 1398 } else { 1399 if (idx) *idx = NULL; 1400 if (v) *v = NULL; 1401 } 1402 } 1403 *nz = nztot; 1404 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1405 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1406 PetscFunctionReturn(0); 1407 } 1408 1409 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1410 { 1411 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1412 1413 PetscFunctionBegin; 1414 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1415 baij->getrowactive = PETSC_FALSE; 1416 PetscFunctionReturn(0); 1417 } 1418 1419 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1420 { 1421 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1422 PetscErrorCode ierr; 1423 1424 PetscFunctionBegin; 1425 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1426 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1427 PetscFunctionReturn(0); 1428 } 1429 1430 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1431 { 1432 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1433 Mat A = a->A,B = a->B; 1434 PetscErrorCode ierr; 1435 PetscLogDouble isend[5],irecv[5]; 1436 1437 PetscFunctionBegin; 1438 info->block_size = (PetscReal)matin->rmap->bs; 1439 1440 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1441 1442 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1443 isend[3] = info->memory; isend[4] = info->mallocs; 1444 1445 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1446 1447 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1448 isend[3] += info->memory; isend[4] += info->mallocs; 1449 1450 if (flag == MAT_LOCAL) { 1451 info->nz_used = isend[0]; 1452 info->nz_allocated = isend[1]; 1453 info->nz_unneeded = isend[2]; 1454 info->memory = isend[3]; 1455 info->mallocs = isend[4]; 1456 } else if (flag == MAT_GLOBAL_MAX) { 1457 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1458 1459 info->nz_used = irecv[0]; 1460 info->nz_allocated = irecv[1]; 1461 info->nz_unneeded = irecv[2]; 1462 info->memory = irecv[3]; 1463 info->mallocs = irecv[4]; 1464 } else if (flag == MAT_GLOBAL_SUM) { 1465 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1466 1467 info->nz_used = irecv[0]; 1468 info->nz_allocated = irecv[1]; 1469 info->nz_unneeded = irecv[2]; 1470 info->memory = irecv[3]; 1471 info->mallocs = irecv[4]; 1472 } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1473 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1474 info->fill_ratio_needed = 0; 1475 info->factor_mallocs = 0; 1476 PetscFunctionReturn(0); 1477 } 1478 1479 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1480 { 1481 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1482 PetscErrorCode ierr; 1483 1484 PetscFunctionBegin; 1485 switch (op) { 1486 case MAT_NEW_NONZERO_LOCATIONS: 1487 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1488 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1489 case MAT_KEEP_NONZERO_PATTERN: 1490 case MAT_NEW_NONZERO_LOCATION_ERR: 1491 MatCheckPreallocated(A,1); 1492 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1493 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1494 break; 1495 case MAT_ROW_ORIENTED: 1496 MatCheckPreallocated(A,1); 1497 a->roworiented = flg; 1498 1499 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1500 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1501 break; 1502 case MAT_FORCE_DIAGONAL_ENTRIES: 1503 case MAT_SORTED_FULL: 1504 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1505 break; 1506 case MAT_IGNORE_OFF_PROC_ENTRIES: 1507 a->donotstash = flg; 1508 break; 1509 case MAT_USE_HASH_TABLE: 1510 a->ht_flag = flg; 1511 a->ht_fact = 1.39; 1512 break; 1513 case MAT_SYMMETRIC: 1514 case MAT_STRUCTURALLY_SYMMETRIC: 1515 case MAT_HERMITIAN: 1516 case MAT_SUBMAT_SINGLEIS: 1517 case MAT_SYMMETRY_ETERNAL: 1518 MatCheckPreallocated(A,1); 1519 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1520 break; 1521 default: 1522 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1523 } 1524 PetscFunctionReturn(0); 1525 } 1526 1527 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1528 { 1529 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1530 Mat_SeqBAIJ *Aloc; 1531 Mat B; 1532 PetscErrorCode ierr; 1533 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1534 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1535 MatScalar *a; 1536 1537 PetscFunctionBegin; 1538 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1539 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1540 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1541 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1542 /* Do not know preallocation information, but must set block size */ 1543 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr); 1544 } else { 1545 B = *matout; 1546 } 1547 1548 /* copy over the A part */ 1549 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1550 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1551 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1552 1553 for (i=0; i<mbs; i++) { 1554 rvals[0] = bs*(baij->rstartbs + i); 1555 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1556 for (j=ai[i]; j<ai[i+1]; j++) { 1557 col = (baij->cstartbs+aj[j])*bs; 1558 for (k=0; k<bs; k++) { 1559 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1560 1561 col++; a += bs; 1562 } 1563 } 1564 } 1565 /* copy over the B part */ 1566 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1567 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1568 for (i=0; i<mbs; i++) { 1569 rvals[0] = bs*(baij->rstartbs + i); 1570 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1571 for (j=ai[i]; j<ai[i+1]; j++) { 1572 col = baij->garray[aj[j]]*bs; 1573 for (k=0; k<bs; k++) { 1574 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1575 col++; 1576 a += bs; 1577 } 1578 } 1579 } 1580 ierr = PetscFree(rvals);CHKERRQ(ierr); 1581 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1582 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1583 1584 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1585 else { 1586 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 1587 } 1588 PetscFunctionReturn(0); 1589 } 1590 1591 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1592 { 1593 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1594 Mat a = baij->A,b = baij->B; 1595 PetscErrorCode ierr; 1596 PetscInt s1,s2,s3; 1597 1598 PetscFunctionBegin; 1599 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1600 if (rr) { 1601 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1602 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1603 /* Overlap communication with computation. */ 1604 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1605 } 1606 if (ll) { 1607 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1608 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1609 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1610 } 1611 /* scale the diagonal block */ 1612 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1613 1614 if (rr) { 1615 /* Do a scatter end and then right scale the off-diagonal block */ 1616 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1617 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1618 } 1619 PetscFunctionReturn(0); 1620 } 1621 1622 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1623 { 1624 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1625 PetscInt *lrows; 1626 PetscInt r, len; 1627 PetscBool cong; 1628 PetscErrorCode ierr; 1629 1630 PetscFunctionBegin; 1631 /* get locally owned rows */ 1632 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 1633 /* fix right hand side if needed */ 1634 if (x && b) { 1635 const PetscScalar *xx; 1636 PetscScalar *bb; 1637 1638 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1639 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1640 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 1641 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1642 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1643 } 1644 1645 /* actually zap the local rows */ 1646 /* 1647 Zero the required rows. If the "diagonal block" of the matrix 1648 is square and the user wishes to set the diagonal we use separate 1649 code so that MatSetValues() is not called for each diagonal allocating 1650 new memory, thus calling lots of mallocs and slowing things down. 1651 1652 */ 1653 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1654 ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1655 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 1656 if ((diag != 0.0) && cong) { 1657 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr); 1658 } else if (diag != 0.0) { 1659 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1660 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1661 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1662 for (r = 0; r < len; ++r) { 1663 const PetscInt row = lrows[r] + A->rmap->rstart; 1664 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1665 } 1666 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1667 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1668 } else { 1669 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1670 } 1671 ierr = PetscFree(lrows);CHKERRQ(ierr); 1672 1673 /* only change matrix nonzero state if pattern was allowed to be changed */ 1674 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1675 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1676 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1677 } 1678 PetscFunctionReturn(0); 1679 } 1680 1681 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1682 { 1683 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1684 PetscErrorCode ierr; 1685 PetscMPIInt n = A->rmap->n,p = 0; 1686 PetscInt i,j,k,r,len = 0,row,col,count; 1687 PetscInt *lrows,*owners = A->rmap->range; 1688 PetscSFNode *rrows; 1689 PetscSF sf; 1690 const PetscScalar *xx; 1691 PetscScalar *bb,*mask; 1692 Vec xmask,lmask; 1693 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)l->B->data; 1694 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1695 PetscScalar *aa; 1696 1697 PetscFunctionBegin; 1698 /* Create SF where leaves are input rows and roots are owned rows */ 1699 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1700 for (r = 0; r < n; ++r) lrows[r] = -1; 1701 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1702 for (r = 0; r < N; ++r) { 1703 const PetscInt idx = rows[r]; 1704 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1705 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1706 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1707 } 1708 rrows[r].rank = p; 1709 rrows[r].index = rows[r] - owners[p]; 1710 } 1711 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1712 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1713 /* Collect flags for rows to be zeroed */ 1714 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1715 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1716 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1717 /* Compress and put in row numbers */ 1718 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1719 /* zero diagonal part of matrix */ 1720 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1721 /* handle off diagonal part of matrix */ 1722 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1723 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1724 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1725 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1726 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1727 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1728 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1729 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1730 if (x) { 1731 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1732 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1733 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1734 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1735 } 1736 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1737 /* remove zeroed rows of off diagonal matrix */ 1738 for (i = 0; i < len; ++i) { 1739 row = lrows[i]; 1740 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1741 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 1742 for (k = 0; k < count; ++k) { 1743 aa[0] = 0.0; 1744 aa += bs; 1745 } 1746 } 1747 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1748 for (i = 0; i < l->B->rmap->N; ++i) { 1749 row = i/bs; 1750 for (j = baij->i[row]; j < baij->i[row+1]; ++j) { 1751 for (k = 0; k < bs; ++k) { 1752 col = bs*baij->j[j] + k; 1753 if (PetscAbsScalar(mask[col])) { 1754 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 1755 if (x) bb[i] -= aa[0]*xx[col]; 1756 aa[0] = 0.0; 1757 } 1758 } 1759 } 1760 } 1761 if (x) { 1762 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1763 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1764 } 1765 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1766 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1767 ierr = PetscFree(lrows);CHKERRQ(ierr); 1768 1769 /* only change matrix nonzero state if pattern was allowed to be changed */ 1770 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1771 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1772 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1773 } 1774 PetscFunctionReturn(0); 1775 } 1776 1777 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1778 { 1779 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1780 PetscErrorCode ierr; 1781 1782 PetscFunctionBegin; 1783 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1784 PetscFunctionReturn(0); 1785 } 1786 1787 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 1788 1789 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1790 { 1791 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1792 Mat a,b,c,d; 1793 PetscBool flg; 1794 PetscErrorCode ierr; 1795 1796 PetscFunctionBegin; 1797 a = matA->A; b = matA->B; 1798 c = matB->A; d = matB->B; 1799 1800 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1801 if (flg) { 1802 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1803 } 1804 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1805 PetscFunctionReturn(0); 1806 } 1807 1808 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1809 { 1810 PetscErrorCode ierr; 1811 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1812 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 1813 1814 PetscFunctionBegin; 1815 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1816 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1817 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1818 } else { 1819 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1820 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1821 } 1822 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 1823 PetscFunctionReturn(0); 1824 } 1825 1826 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1827 { 1828 PetscErrorCode ierr; 1829 1830 PetscFunctionBegin; 1831 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 1832 PetscFunctionReturn(0); 1833 } 1834 1835 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 1836 { 1837 PetscErrorCode ierr; 1838 PetscInt bs = Y->rmap->bs,m = Y->rmap->N/bs; 1839 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 1840 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 1841 1842 PetscFunctionBegin; 1843 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 1844 PetscFunctionReturn(0); 1845 } 1846 1847 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1848 { 1849 PetscErrorCode ierr; 1850 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 1851 PetscBLASInt bnz,one=1; 1852 Mat_SeqBAIJ *x,*y; 1853 PetscInt bs2 = Y->rmap->bs*Y->rmap->bs; 1854 1855 PetscFunctionBegin; 1856 if (str == SAME_NONZERO_PATTERN) { 1857 PetscScalar alpha = a; 1858 x = (Mat_SeqBAIJ*)xx->A->data; 1859 y = (Mat_SeqBAIJ*)yy->A->data; 1860 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 1861 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1862 x = (Mat_SeqBAIJ*)xx->B->data; 1863 y = (Mat_SeqBAIJ*)yy->B->data; 1864 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 1865 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1866 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1867 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1868 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1869 } else { 1870 Mat B; 1871 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1872 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1873 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1874 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1875 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1876 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1877 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1878 ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr); 1879 ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1880 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1881 ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1882 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1883 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1884 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 1885 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1886 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1887 } 1888 PetscFunctionReturn(0); 1889 } 1890 1891 PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) 1892 { 1893 #if defined(PETSC_USE_COMPLEX) 1894 PetscErrorCode ierr; 1895 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1896 1897 PetscFunctionBegin; 1898 ierr = MatConjugate_SeqBAIJ(a->A);CHKERRQ(ierr); 1899 ierr = MatConjugate_SeqBAIJ(a->B);CHKERRQ(ierr); 1900 #else 1901 PetscFunctionBegin; 1902 #endif 1903 PetscFunctionReturn(0); 1904 } 1905 1906 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1907 { 1908 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1909 PetscErrorCode ierr; 1910 1911 PetscFunctionBegin; 1912 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1913 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1914 PetscFunctionReturn(0); 1915 } 1916 1917 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1918 { 1919 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1920 PetscErrorCode ierr; 1921 1922 PetscFunctionBegin; 1923 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1924 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1925 PetscFunctionReturn(0); 1926 } 1927 1928 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1929 { 1930 PetscErrorCode ierr; 1931 IS iscol_local; 1932 PetscInt csize; 1933 1934 PetscFunctionBegin; 1935 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1936 if (call == MAT_REUSE_MATRIX) { 1937 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1938 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1939 } else { 1940 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1941 } 1942 ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1943 if (call == MAT_INITIAL_MATRIX) { 1944 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1945 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1946 } 1947 PetscFunctionReturn(0); 1948 } 1949 1950 /* 1951 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1952 in local and then by concatenating the local matrices the end result. 1953 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1954 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1955 */ 1956 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 1957 { 1958 PetscErrorCode ierr; 1959 PetscMPIInt rank,size; 1960 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 1961 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 1962 Mat M,Mreuse; 1963 MatScalar *vwork,*aa; 1964 MPI_Comm comm; 1965 IS isrow_new, iscol_new; 1966 Mat_SeqBAIJ *aij; 1967 1968 PetscFunctionBegin; 1969 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 1970 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 1971 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 1972 /* The compression and expansion should be avoided. Doesn't point 1973 out errors, might change the indices, hence buggey */ 1974 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 1975 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 1976 1977 if (call == MAT_REUSE_MATRIX) { 1978 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 1979 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1980 ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse);CHKERRQ(ierr); 1981 } else { 1982 ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse);CHKERRQ(ierr); 1983 } 1984 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 1985 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 1986 /* 1987 m - number of local rows 1988 n - number of columns (same on all processors) 1989 rstart - first row in new global matrix generated 1990 */ 1991 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 1992 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1993 m = m/bs; 1994 n = n/bs; 1995 1996 if (call == MAT_INITIAL_MATRIX) { 1997 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1998 ii = aij->i; 1999 jj = aij->j; 2000 2001 /* 2002 Determine the number of non-zeros in the diagonal and off-diagonal 2003 portions of the matrix in order to do correct preallocation 2004 */ 2005 2006 /* first get start and end of "diagonal" columns */ 2007 if (csize == PETSC_DECIDE) { 2008 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2009 if (mglobal == n*bs) { /* square matrix */ 2010 nlocal = m; 2011 } else { 2012 nlocal = n/size + ((n % size) > rank); 2013 } 2014 } else { 2015 nlocal = csize/bs; 2016 } 2017 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 2018 rstart = rend - nlocal; 2019 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2020 2021 /* next, compute all the lengths */ 2022 ierr = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr); 2023 for (i=0; i<m; i++) { 2024 jend = ii[i+1] - ii[i]; 2025 olen = 0; 2026 dlen = 0; 2027 for (j=0; j<jend; j++) { 2028 if (*jj < rstart || *jj >= rend) olen++; 2029 else dlen++; 2030 jj++; 2031 } 2032 olens[i] = olen; 2033 dlens[i] = dlen; 2034 } 2035 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2036 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2037 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2038 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2039 ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2040 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 2041 } else { 2042 PetscInt ml,nl; 2043 2044 M = *newmat; 2045 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2046 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2047 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2048 /* 2049 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2050 rather than the slower MatSetValues(). 2051 */ 2052 M->was_assembled = PETSC_TRUE; 2053 M->assembled = PETSC_FALSE; 2054 } 2055 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2056 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2057 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2058 ii = aij->i; 2059 jj = aij->j; 2060 aa = aij->a; 2061 for (i=0; i<m; i++) { 2062 row = rstart/bs + i; 2063 nz = ii[i+1] - ii[i]; 2064 cwork = jj; jj += nz; 2065 vwork = aa; aa += nz*bs*bs; 2066 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2067 } 2068 2069 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2070 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2071 *newmat = M; 2072 2073 /* save submatrix used in processor for next request */ 2074 if (call == MAT_INITIAL_MATRIX) { 2075 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2076 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2077 } 2078 PetscFunctionReturn(0); 2079 } 2080 2081 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2082 { 2083 MPI_Comm comm,pcomm; 2084 PetscInt clocal_size,nrows; 2085 const PetscInt *rows; 2086 PetscMPIInt size; 2087 IS crowp,lcolp; 2088 PetscErrorCode ierr; 2089 2090 PetscFunctionBegin; 2091 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2092 /* make a collective version of 'rowp' */ 2093 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2094 if (pcomm==comm) { 2095 crowp = rowp; 2096 } else { 2097 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2098 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2099 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2100 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2101 } 2102 ierr = ISSetPermutation(crowp);CHKERRQ(ierr); 2103 /* make a local version of 'colp' */ 2104 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2105 ierr = MPI_Comm_size(pcomm,&size);CHKERRMPI(ierr); 2106 if (size==1) { 2107 lcolp = colp; 2108 } else { 2109 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 2110 } 2111 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2112 /* now we just get the submatrix */ 2113 ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr); 2114 ierr = MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2115 /* clean up */ 2116 if (pcomm!=comm) { 2117 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2118 } 2119 if (size>1) { 2120 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2121 } 2122 PetscFunctionReturn(0); 2123 } 2124 2125 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2126 { 2127 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2128 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2129 2130 PetscFunctionBegin; 2131 if (nghosts) *nghosts = B->nbs; 2132 if (ghosts) *ghosts = baij->garray; 2133 PetscFunctionReturn(0); 2134 } 2135 2136 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2137 { 2138 Mat B; 2139 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2140 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2141 Mat_SeqAIJ *b; 2142 PetscErrorCode ierr; 2143 PetscMPIInt size,rank,*recvcounts = NULL,*displs = NULL; 2144 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2145 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2146 2147 PetscFunctionBegin; 2148 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 2149 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr); 2150 2151 /* ---------------------------------------------------------------- 2152 Tell every processor the number of nonzeros per row 2153 */ 2154 ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr); 2155 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2156 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2157 } 2158 ierr = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr); 2159 displs = recvcounts + size; 2160 for (i=0; i<size; i++) { 2161 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2162 displs[i] = A->rmap->range[i]/bs; 2163 } 2164 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2165 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 2166 #else 2167 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2168 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 2169 #endif 2170 /* --------------------------------------------------------------- 2171 Create the sequential matrix of the same type as the local block diagonal 2172 */ 2173 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2174 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2175 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2176 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2177 b = (Mat_SeqAIJ*)B->data; 2178 2179 /*-------------------------------------------------------------------- 2180 Copy my part of matrix column indices over 2181 */ 2182 sendcount = ad->nz + bd->nz; 2183 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2184 a_jsendbuf = ad->j; 2185 b_jsendbuf = bd->j; 2186 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2187 cnt = 0; 2188 for (i=0; i<n; i++) { 2189 2190 /* put in lower diagonal portion */ 2191 m = bd->i[i+1] - bd->i[i]; 2192 while (m > 0) { 2193 /* is it above diagonal (in bd (compressed) numbering) */ 2194 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2195 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2196 m--; 2197 } 2198 2199 /* put in diagonal portion */ 2200 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2201 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2202 } 2203 2204 /* put in upper diagonal portion */ 2205 while (m-- > 0) { 2206 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2207 } 2208 } 2209 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2210 2211 /*-------------------------------------------------------------------- 2212 Gather all column indices to all processors 2213 */ 2214 for (i=0; i<size; i++) { 2215 recvcounts[i] = 0; 2216 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2217 recvcounts[i] += lens[j]; 2218 } 2219 } 2220 displs[0] = 0; 2221 for (i=1; i<size; i++) { 2222 displs[i] = displs[i-1] + recvcounts[i-1]; 2223 } 2224 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2225 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 2226 #else 2227 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 2228 #endif 2229 /*-------------------------------------------------------------------- 2230 Assemble the matrix into useable form (note numerical values not yet set) 2231 */ 2232 /* set the b->ilen (length of each row) values */ 2233 ierr = PetscArraycpy(b->ilen,lens,A->rmap->N/bs);CHKERRQ(ierr); 2234 /* set the b->i indices */ 2235 b->i[0] = 0; 2236 for (i=1; i<=A->rmap->N/bs; i++) { 2237 b->i[i] = b->i[i-1] + lens[i-1]; 2238 } 2239 ierr = PetscFree(lens);CHKERRQ(ierr); 2240 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2241 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2242 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2243 2244 if (A->symmetric) { 2245 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2246 } else if (A->hermitian) { 2247 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2248 } else if (A->structurally_symmetric) { 2249 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2250 } 2251 *newmat = B; 2252 PetscFunctionReturn(0); 2253 } 2254 2255 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2256 { 2257 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2258 PetscErrorCode ierr; 2259 Vec bb1 = NULL; 2260 2261 PetscFunctionBegin; 2262 if (flag == SOR_APPLY_UPPER) { 2263 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2264 PetscFunctionReturn(0); 2265 } 2266 2267 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2268 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2269 } 2270 2271 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2272 if (flag & SOR_ZERO_INITIAL_GUESS) { 2273 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2274 its--; 2275 } 2276 2277 while (its--) { 2278 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2279 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2280 2281 /* update rhs: bb1 = bb - B*x */ 2282 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2283 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2284 2285 /* local sweep */ 2286 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2287 } 2288 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2289 if (flag & SOR_ZERO_INITIAL_GUESS) { 2290 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2291 its--; 2292 } 2293 while (its--) { 2294 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2295 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2296 2297 /* update rhs: bb1 = bb - B*x */ 2298 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2299 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2300 2301 /* local sweep */ 2302 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2303 } 2304 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2305 if (flag & SOR_ZERO_INITIAL_GUESS) { 2306 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2307 its--; 2308 } 2309 while (its--) { 2310 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2311 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2312 2313 /* update rhs: bb1 = bb - B*x */ 2314 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2315 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2316 2317 /* local sweep */ 2318 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2319 } 2320 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2321 2322 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2323 PetscFunctionReturn(0); 2324 } 2325 2326 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms) 2327 { 2328 PetscErrorCode ierr; 2329 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data; 2330 PetscInt N,i,*garray = aij->garray; 2331 PetscInt ib,jb,bs = A->rmap->bs; 2332 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data; 2333 MatScalar *a_val = a_aij->a; 2334 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data; 2335 MatScalar *b_val = b_aij->a; 2336 PetscReal *work; 2337 2338 PetscFunctionBegin; 2339 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 2340 ierr = PetscCalloc1(N,&work);CHKERRQ(ierr); 2341 if (type == NORM_2) { 2342 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2343 for (jb=0; jb<bs; jb++) { 2344 for (ib=0; ib<bs; ib++) { 2345 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2346 a_val++; 2347 } 2348 } 2349 } 2350 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2351 for (jb=0; jb<bs; jb++) { 2352 for (ib=0; ib<bs; ib++) { 2353 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2354 b_val++; 2355 } 2356 } 2357 } 2358 } else if (type == NORM_1) { 2359 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2360 for (jb=0; jb<bs; jb++) { 2361 for (ib=0; ib<bs; ib++) { 2362 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2363 a_val++; 2364 } 2365 } 2366 } 2367 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2368 for (jb=0; jb<bs; jb++) { 2369 for (ib=0; ib<bs; ib++) { 2370 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2371 b_val++; 2372 } 2373 } 2374 } 2375 } else if (type == NORM_INFINITY) { 2376 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2377 for (jb=0; jb<bs; jb++) { 2378 for (ib=0; ib<bs; ib++) { 2379 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2380 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2381 a_val++; 2382 } 2383 } 2384 } 2385 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2386 for (jb=0; jb<bs; jb++) { 2387 for (ib=0; ib<bs; ib++) { 2388 int col = garray[b_aij->j[i]] * bs + jb; 2389 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2390 b_val++; 2391 } 2392 } 2393 } 2394 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 2395 if (type == NORM_INFINITY) { 2396 ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2397 } else { 2398 ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2399 } 2400 ierr = PetscFree(work);CHKERRQ(ierr); 2401 if (type == NORM_2) { 2402 for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]); 2403 } 2404 PetscFunctionReturn(0); 2405 } 2406 2407 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2408 { 2409 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2410 PetscErrorCode ierr; 2411 2412 PetscFunctionBegin; 2413 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2414 A->factorerrortype = a->A->factorerrortype; 2415 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2416 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2417 PetscFunctionReturn(0); 2418 } 2419 2420 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a) 2421 { 2422 PetscErrorCode ierr; 2423 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ*)Y->data; 2424 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)maij->A->data; 2425 2426 PetscFunctionBegin; 2427 if (!Y->preallocated) { 2428 ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 2429 } else if (!aij->nz) { 2430 PetscInt nonew = aij->nonew; 2431 ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2432 aij->nonew = nonew; 2433 } 2434 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2435 PetscFunctionReturn(0); 2436 } 2437 2438 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool *missing,PetscInt *d) 2439 { 2440 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2441 PetscErrorCode ierr; 2442 2443 PetscFunctionBegin; 2444 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2445 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2446 if (d) { 2447 PetscInt rstart; 2448 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2449 *d += rstart/A->rmap->bs; 2450 2451 } 2452 PetscFunctionReturn(0); 2453 } 2454 2455 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2456 { 2457 PetscFunctionBegin; 2458 *a = ((Mat_MPIBAIJ*)A->data)->A; 2459 PetscFunctionReturn(0); 2460 } 2461 2462 /* -------------------------------------------------------------------*/ 2463 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2464 MatGetRow_MPIBAIJ, 2465 MatRestoreRow_MPIBAIJ, 2466 MatMult_MPIBAIJ, 2467 /* 4*/ MatMultAdd_MPIBAIJ, 2468 MatMultTranspose_MPIBAIJ, 2469 MatMultTransposeAdd_MPIBAIJ, 2470 NULL, 2471 NULL, 2472 NULL, 2473 /*10*/ NULL, 2474 NULL, 2475 NULL, 2476 MatSOR_MPIBAIJ, 2477 MatTranspose_MPIBAIJ, 2478 /*15*/ MatGetInfo_MPIBAIJ, 2479 MatEqual_MPIBAIJ, 2480 MatGetDiagonal_MPIBAIJ, 2481 MatDiagonalScale_MPIBAIJ, 2482 MatNorm_MPIBAIJ, 2483 /*20*/ MatAssemblyBegin_MPIBAIJ, 2484 MatAssemblyEnd_MPIBAIJ, 2485 MatSetOption_MPIBAIJ, 2486 MatZeroEntries_MPIBAIJ, 2487 /*24*/ MatZeroRows_MPIBAIJ, 2488 NULL, 2489 NULL, 2490 NULL, 2491 NULL, 2492 /*29*/ MatSetUp_MPIBAIJ, 2493 NULL, 2494 NULL, 2495 MatGetDiagonalBlock_MPIBAIJ, 2496 NULL, 2497 /*34*/ MatDuplicate_MPIBAIJ, 2498 NULL, 2499 NULL, 2500 NULL, 2501 NULL, 2502 /*39*/ MatAXPY_MPIBAIJ, 2503 MatCreateSubMatrices_MPIBAIJ, 2504 MatIncreaseOverlap_MPIBAIJ, 2505 MatGetValues_MPIBAIJ, 2506 MatCopy_MPIBAIJ, 2507 /*44*/ NULL, 2508 MatScale_MPIBAIJ, 2509 MatShift_MPIBAIJ, 2510 NULL, 2511 MatZeroRowsColumns_MPIBAIJ, 2512 /*49*/ NULL, 2513 NULL, 2514 NULL, 2515 NULL, 2516 NULL, 2517 /*54*/ MatFDColoringCreate_MPIXAIJ, 2518 NULL, 2519 MatSetUnfactored_MPIBAIJ, 2520 MatPermute_MPIBAIJ, 2521 MatSetValuesBlocked_MPIBAIJ, 2522 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2523 MatDestroy_MPIBAIJ, 2524 MatView_MPIBAIJ, 2525 NULL, 2526 NULL, 2527 /*64*/ NULL, 2528 NULL, 2529 NULL, 2530 NULL, 2531 NULL, 2532 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2533 NULL, 2534 NULL, 2535 NULL, 2536 NULL, 2537 /*74*/ NULL, 2538 MatFDColoringApply_BAIJ, 2539 NULL, 2540 NULL, 2541 NULL, 2542 /*79*/ NULL, 2543 NULL, 2544 NULL, 2545 NULL, 2546 MatLoad_MPIBAIJ, 2547 /*84*/ NULL, 2548 NULL, 2549 NULL, 2550 NULL, 2551 NULL, 2552 /*89*/ NULL, 2553 NULL, 2554 NULL, 2555 NULL, 2556 NULL, 2557 /*94*/ NULL, 2558 NULL, 2559 NULL, 2560 NULL, 2561 NULL, 2562 /*99*/ NULL, 2563 NULL, 2564 NULL, 2565 MatConjugate_MPIBAIJ, 2566 NULL, 2567 /*104*/NULL, 2568 MatRealPart_MPIBAIJ, 2569 MatImaginaryPart_MPIBAIJ, 2570 NULL, 2571 NULL, 2572 /*109*/NULL, 2573 NULL, 2574 NULL, 2575 NULL, 2576 MatMissingDiagonal_MPIBAIJ, 2577 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2578 NULL, 2579 MatGetGhosts_MPIBAIJ, 2580 NULL, 2581 NULL, 2582 /*119*/NULL, 2583 NULL, 2584 NULL, 2585 NULL, 2586 MatGetMultiProcBlock_MPIBAIJ, 2587 /*124*/NULL, 2588 MatGetColumnNorms_MPIBAIJ, 2589 MatInvertBlockDiagonal_MPIBAIJ, 2590 NULL, 2591 NULL, 2592 /*129*/ NULL, 2593 NULL, 2594 NULL, 2595 NULL, 2596 NULL, 2597 /*134*/ NULL, 2598 NULL, 2599 NULL, 2600 NULL, 2601 NULL, 2602 /*139*/ MatSetBlockSizes_Default, 2603 NULL, 2604 NULL, 2605 MatFDColoringSetUp_MPIXAIJ, 2606 NULL, 2607 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ 2608 }; 2609 2610 2611 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 2612 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 2613 2614 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2615 { 2616 PetscInt m,rstart,cstart,cend; 2617 PetscInt i,j,dlen,olen,nz,nz_max=0,*d_nnz=NULL,*o_nnz=NULL; 2618 const PetscInt *JJ =NULL; 2619 PetscScalar *values=NULL; 2620 PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented; 2621 PetscErrorCode ierr; 2622 PetscBool nooffprocentries; 2623 2624 PetscFunctionBegin; 2625 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2626 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2627 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2628 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2629 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2630 m = B->rmap->n/bs; 2631 rstart = B->rmap->rstart/bs; 2632 cstart = B->cmap->rstart/bs; 2633 cend = B->cmap->rend/bs; 2634 2635 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2636 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2637 for (i=0; i<m; i++) { 2638 nz = ii[i+1] - ii[i]; 2639 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2640 nz_max = PetscMax(nz_max,nz); 2641 dlen = 0; 2642 olen = 0; 2643 JJ = jj + ii[i]; 2644 for (j=0; j<nz; j++) { 2645 if (*JJ < cstart || *JJ >= cend) olen++; 2646 else dlen++; 2647 JJ++; 2648 } 2649 d_nnz[i] = dlen; 2650 o_nnz[i] = olen; 2651 } 2652 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2653 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2654 2655 values = (PetscScalar*)V; 2656 if (!values) { 2657 ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2658 } 2659 for (i=0; i<m; i++) { 2660 PetscInt row = i + rstart; 2661 PetscInt ncols = ii[i+1] - ii[i]; 2662 const PetscInt *icols = jj + ii[i]; 2663 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2664 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2665 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2666 } else { /* block ordering does not match so we can only insert one block at a time. */ 2667 PetscInt j; 2668 for (j=0; j<ncols; j++) { 2669 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2670 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2671 } 2672 } 2673 } 2674 2675 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2676 nooffprocentries = B->nooffprocentries; 2677 B->nooffprocentries = PETSC_TRUE; 2678 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2679 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2680 B->nooffprocentries = nooffprocentries; 2681 2682 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2683 PetscFunctionReturn(0); 2684 } 2685 2686 /*@C 2687 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values 2688 2689 Collective 2690 2691 Input Parameters: 2692 + B - the matrix 2693 . bs - the block size 2694 . i - the indices into j for the start of each local row (starts with zero) 2695 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2696 - v - optional values in the matrix 2697 2698 Level: advanced 2699 2700 Notes: 2701 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 2702 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 2703 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2704 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2705 block column and the second index is over columns within a block. 2706 2707 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2708 2709 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ 2710 @*/ 2711 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2712 { 2713 PetscErrorCode ierr; 2714 2715 PetscFunctionBegin; 2716 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2717 PetscValidType(B,1); 2718 PetscValidLogicalCollectiveInt(B,bs,2); 2719 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2720 PetscFunctionReturn(0); 2721 } 2722 2723 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2724 { 2725 Mat_MPIBAIJ *b; 2726 PetscErrorCode ierr; 2727 PetscInt i; 2728 PetscMPIInt size; 2729 2730 PetscFunctionBegin; 2731 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2732 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2733 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2734 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2735 2736 if (d_nnz) { 2737 for (i=0; i<B->rmap->n/bs; i++) { 2738 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2739 } 2740 } 2741 if (o_nnz) { 2742 for (i=0; i<B->rmap->n/bs; i++) { 2743 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2744 } 2745 } 2746 2747 b = (Mat_MPIBAIJ*)B->data; 2748 b->bs2 = bs*bs; 2749 b->mbs = B->rmap->n/bs; 2750 b->nbs = B->cmap->n/bs; 2751 b->Mbs = B->rmap->N/bs; 2752 b->Nbs = B->cmap->N/bs; 2753 2754 for (i=0; i<=b->size; i++) { 2755 b->rangebs[i] = B->rmap->range[i]/bs; 2756 } 2757 b->rstartbs = B->rmap->rstart/bs; 2758 b->rendbs = B->rmap->rend/bs; 2759 b->cstartbs = B->cmap->rstart/bs; 2760 b->cendbs = B->cmap->rend/bs; 2761 2762 #if defined(PETSC_USE_CTABLE) 2763 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2764 #else 2765 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2766 #endif 2767 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2768 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2769 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2770 2771 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2772 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 2773 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2774 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2775 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2776 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2777 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2778 2779 if (!B->preallocated) { 2780 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2781 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2782 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2783 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2784 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2785 } 2786 2787 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2788 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2789 B->preallocated = PETSC_TRUE; 2790 B->was_assembled = PETSC_FALSE; 2791 B->assembled = PETSC_FALSE; 2792 PetscFunctionReturn(0); 2793 } 2794 2795 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2796 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2797 2798 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 2799 { 2800 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2801 PetscErrorCode ierr; 2802 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2803 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2804 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2805 2806 PetscFunctionBegin; 2807 ierr = PetscMalloc1(M+1,&ii);CHKERRQ(ierr); 2808 ii[0] = 0; 2809 for (i=0; i<M; i++) { 2810 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2811 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 2812 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2813 /* remove one from count of matrix has diagonal */ 2814 for (j=id[i]; j<id[i+1]; j++) { 2815 if (jd[j] == i) {ii[i+1]--;break;} 2816 } 2817 } 2818 ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr); 2819 cnt = 0; 2820 for (i=0; i<M; i++) { 2821 for (j=io[i]; j<io[i+1]; j++) { 2822 if (garray[jo[j]] > rstart) break; 2823 jj[cnt++] = garray[jo[j]]; 2824 } 2825 for (k=id[i]; k<id[i+1]; k++) { 2826 if (jd[k] != i) { 2827 jj[cnt++] = rstart + jd[k]; 2828 } 2829 } 2830 for (; j<io[i+1]; j++) { 2831 jj[cnt++] = garray[jo[j]]; 2832 } 2833 } 2834 ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr); 2835 PetscFunctionReturn(0); 2836 } 2837 2838 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2839 2840 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 2841 2842 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 2843 { 2844 PetscErrorCode ierr; 2845 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2846 Mat B; 2847 Mat_MPIAIJ *b; 2848 2849 PetscFunctionBegin; 2850 if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 2851 2852 if (reuse == MAT_REUSE_MATRIX) { 2853 B = *newmat; 2854 } else { 2855 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2856 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2857 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2858 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2859 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 2860 ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 2861 } 2862 b = (Mat_MPIAIJ*) B->data; 2863 2864 if (reuse == MAT_REUSE_MATRIX) { 2865 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A);CHKERRQ(ierr); 2866 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B);CHKERRQ(ierr); 2867 } else { 2868 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 2869 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2870 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 2871 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 2872 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 2873 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2874 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2875 } 2876 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2877 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2878 2879 if (reuse == MAT_INPLACE_MATRIX) { 2880 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 2881 } else { 2882 *newmat = B; 2883 } 2884 PetscFunctionReturn(0); 2885 } 2886 2887 /*MC 2888 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2889 2890 Options Database Keys: 2891 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2892 . -mat_block_size <bs> - set the blocksize used to store the matrix 2893 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 2894 - -mat_use_hash_table <fact> 2895 2896 Level: beginner 2897 2898 Notes: 2899 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 2900 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 2901 2902 .seealso: MatCreateBAIJ 2903 M*/ 2904 2905 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 2906 2907 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2908 { 2909 Mat_MPIBAIJ *b; 2910 PetscErrorCode ierr; 2911 PetscBool flg = PETSC_FALSE; 2912 2913 PetscFunctionBegin; 2914 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2915 B->data = (void*)b; 2916 2917 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2918 B->assembled = PETSC_FALSE; 2919 2920 B->insertmode = NOT_SET_VALUES; 2921 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr); 2922 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRMPI(ierr); 2923 2924 /* build local table of row and column ownerships */ 2925 ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr); 2926 2927 /* build cache for off array entries formed */ 2928 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 2929 2930 b->donotstash = PETSC_FALSE; 2931 b->colmap = NULL; 2932 b->garray = NULL; 2933 b->roworiented = PETSC_TRUE; 2934 2935 /* stuff used in block assembly */ 2936 b->barray = NULL; 2937 2938 /* stuff used for matrix vector multiply */ 2939 b->lvec = NULL; 2940 b->Mvctx = NULL; 2941 2942 /* stuff for MatGetRow() */ 2943 b->rowindices = NULL; 2944 b->rowvalues = NULL; 2945 b->getrowactive = PETSC_FALSE; 2946 2947 /* hash table stuff */ 2948 b->ht = NULL; 2949 b->hd = NULL; 2950 b->ht_size = 0; 2951 b->ht_flag = PETSC_FALSE; 2952 b->ht_fact = 0; 2953 b->ht_total_ct = 0; 2954 b->ht_insert_ct = 0; 2955 2956 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2957 b->ijonly = PETSC_FALSE; 2958 2959 2960 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 2961 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 2962 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 2963 #if defined(PETSC_HAVE_HYPRE) 2964 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 2965 #endif 2966 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2967 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2968 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2969 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 2970 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2971 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2972 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 2973 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 2974 2975 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 2976 ierr = PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg);CHKERRQ(ierr); 2977 if (flg) { 2978 PetscReal fact = 1.39; 2979 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2980 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 2981 if (fact <= 1.0) fact = 1.39; 2982 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2983 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2984 } 2985 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2986 PetscFunctionReturn(0); 2987 } 2988 2989 /*MC 2990 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2991 2992 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2993 and MATMPIBAIJ otherwise. 2994 2995 Options Database Keys: 2996 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2997 2998 Level: beginner 2999 3000 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3001 M*/ 3002 3003 /*@C 3004 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3005 (block compressed row). For good matrix assembly performance 3006 the user should preallocate the matrix storage by setting the parameters 3007 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3008 performance can be increased by more than a factor of 50. 3009 3010 Collective on Mat 3011 3012 Input Parameters: 3013 + B - the matrix 3014 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3015 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3016 . d_nz - number of block nonzeros per block row in diagonal portion of local 3017 submatrix (same for all local rows) 3018 . d_nnz - array containing the number of block nonzeros in the various block rows 3019 of the in diagonal portion of the local (possibly different for each block 3020 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 3021 set it even if it is zero. 3022 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3023 submatrix (same for all local rows). 3024 - o_nnz - array containing the number of nonzeros in the various block rows of the 3025 off-diagonal portion of the local submatrix (possibly different for 3026 each block row) or NULL. 3027 3028 If the *_nnz parameter is given then the *_nz parameter is ignored 3029 3030 Options Database Keys: 3031 + -mat_block_size - size of the blocks to use 3032 - -mat_use_hash_table <fact> 3033 3034 Notes: 3035 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3036 than it must be used on all processors that share the object for that argument. 3037 3038 Storage Information: 3039 For a square global matrix we define each processor's diagonal portion 3040 to be its local rows and the corresponding columns (a square submatrix); 3041 each processor's off-diagonal portion encompasses the remainder of the 3042 local matrix (a rectangular submatrix). 3043 3044 The user can specify preallocated storage for the diagonal part of 3045 the local submatrix with either d_nz or d_nnz (not both). Set 3046 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3047 memory allocation. Likewise, specify preallocated storage for the 3048 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3049 3050 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3051 the figure below we depict these three local rows and all columns (0-11). 3052 3053 .vb 3054 0 1 2 3 4 5 6 7 8 9 10 11 3055 -------------------------- 3056 row 3 |o o o d d d o o o o o o 3057 row 4 |o o o d d d o o o o o o 3058 row 5 |o o o d d d o o o o o o 3059 -------------------------- 3060 .ve 3061 3062 Thus, any entries in the d locations are stored in the d (diagonal) 3063 submatrix, and any entries in the o locations are stored in the 3064 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3065 stored simply in the MATSEQBAIJ format for compressed row storage. 3066 3067 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3068 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3069 In general, for PDE problems in which most nonzeros are near the diagonal, 3070 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3071 or you will get TERRIBLE performance; see the users' manual chapter on 3072 matrices. 3073 3074 You can call MatGetInfo() to get information on how effective the preallocation was; 3075 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3076 You can also run with the option -info and look for messages with the string 3077 malloc in them to see if additional memory allocation was needed. 3078 3079 Level: intermediate 3080 3081 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3082 @*/ 3083 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3084 { 3085 PetscErrorCode ierr; 3086 3087 PetscFunctionBegin; 3088 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3089 PetscValidType(B,1); 3090 PetscValidLogicalCollectiveInt(B,bs,2); 3091 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3092 PetscFunctionReturn(0); 3093 } 3094 3095 /*@C 3096 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3097 (block compressed row). For good matrix assembly performance 3098 the user should preallocate the matrix storage by setting the parameters 3099 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3100 performance can be increased by more than a factor of 50. 3101 3102 Collective 3103 3104 Input Parameters: 3105 + comm - MPI communicator 3106 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3107 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3108 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3109 This value should be the same as the local size used in creating the 3110 y vector for the matrix-vector product y = Ax. 3111 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3112 This value should be the same as the local size used in creating the 3113 x vector for the matrix-vector product y = Ax. 3114 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3115 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3116 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3117 submatrix (same for all local rows) 3118 . d_nnz - array containing the number of nonzero blocks in the various block rows 3119 of the in diagonal portion of the local (possibly different for each block 3120 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3121 and set it even if it is zero. 3122 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3123 submatrix (same for all local rows). 3124 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3125 off-diagonal portion of the local submatrix (possibly different for 3126 each block row) or NULL. 3127 3128 Output Parameter: 3129 . A - the matrix 3130 3131 Options Database Keys: 3132 + -mat_block_size - size of the blocks to use 3133 - -mat_use_hash_table <fact> 3134 3135 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3136 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3137 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3138 3139 Notes: 3140 If the *_nnz parameter is given then the *_nz parameter is ignored 3141 3142 A nonzero block is any block that as 1 or more nonzeros in it 3143 3144 The user MUST specify either the local or global matrix dimensions 3145 (possibly both). 3146 3147 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3148 than it must be used on all processors that share the object for that argument. 3149 3150 Storage Information: 3151 For a square global matrix we define each processor's diagonal portion 3152 to be its local rows and the corresponding columns (a square submatrix); 3153 each processor's off-diagonal portion encompasses the remainder of the 3154 local matrix (a rectangular submatrix). 3155 3156 The user can specify preallocated storage for the diagonal part of 3157 the local submatrix with either d_nz or d_nnz (not both). Set 3158 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3159 memory allocation. Likewise, specify preallocated storage for the 3160 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3161 3162 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3163 the figure below we depict these three local rows and all columns (0-11). 3164 3165 .vb 3166 0 1 2 3 4 5 6 7 8 9 10 11 3167 -------------------------- 3168 row 3 |o o o d d d o o o o o o 3169 row 4 |o o o d d d o o o o o o 3170 row 5 |o o o d d d o o o o o o 3171 -------------------------- 3172 .ve 3173 3174 Thus, any entries in the d locations are stored in the d (diagonal) 3175 submatrix, and any entries in the o locations are stored in the 3176 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3177 stored simply in the MATSEQBAIJ format for compressed row storage. 3178 3179 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3180 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3181 In general, for PDE problems in which most nonzeros are near the diagonal, 3182 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3183 or you will get TERRIBLE performance; see the users' manual chapter on 3184 matrices. 3185 3186 Level: intermediate 3187 3188 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3189 @*/ 3190 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3191 { 3192 PetscErrorCode ierr; 3193 PetscMPIInt size; 3194 3195 PetscFunctionBegin; 3196 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3197 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3198 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3199 if (size > 1) { 3200 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3201 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3202 } else { 3203 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3204 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3205 } 3206 PetscFunctionReturn(0); 3207 } 3208 3209 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3210 { 3211 Mat mat; 3212 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3213 PetscErrorCode ierr; 3214 PetscInt len=0; 3215 3216 PetscFunctionBegin; 3217 *newmat = NULL; 3218 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3219 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3220 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3221 3222 mat->factortype = matin->factortype; 3223 mat->preallocated = PETSC_TRUE; 3224 mat->assembled = PETSC_TRUE; 3225 mat->insertmode = NOT_SET_VALUES; 3226 3227 a = (Mat_MPIBAIJ*)mat->data; 3228 mat->rmap->bs = matin->rmap->bs; 3229 a->bs2 = oldmat->bs2; 3230 a->mbs = oldmat->mbs; 3231 a->nbs = oldmat->nbs; 3232 a->Mbs = oldmat->Mbs; 3233 a->Nbs = oldmat->Nbs; 3234 3235 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3236 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3237 3238 a->size = oldmat->size; 3239 a->rank = oldmat->rank; 3240 a->donotstash = oldmat->donotstash; 3241 a->roworiented = oldmat->roworiented; 3242 a->rowindices = NULL; 3243 a->rowvalues = NULL; 3244 a->getrowactive = PETSC_FALSE; 3245 a->barray = NULL; 3246 a->rstartbs = oldmat->rstartbs; 3247 a->rendbs = oldmat->rendbs; 3248 a->cstartbs = oldmat->cstartbs; 3249 a->cendbs = oldmat->cendbs; 3250 3251 /* hash table stuff */ 3252 a->ht = NULL; 3253 a->hd = NULL; 3254 a->ht_size = 0; 3255 a->ht_flag = oldmat->ht_flag; 3256 a->ht_fact = oldmat->ht_fact; 3257 a->ht_total_ct = 0; 3258 a->ht_insert_ct = 0; 3259 3260 ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+1);CHKERRQ(ierr); 3261 if (oldmat->colmap) { 3262 #if defined(PETSC_USE_CTABLE) 3263 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3264 #else 3265 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 3266 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3267 ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr); 3268 #endif 3269 } else a->colmap = NULL; 3270 3271 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3272 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 3273 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3274 ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); 3275 } else a->garray = NULL; 3276 3277 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3278 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3279 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3280 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3281 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3282 3283 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3284 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3285 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3286 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3287 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3288 *newmat = mat; 3289 PetscFunctionReturn(0); 3290 } 3291 3292 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3293 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 3294 { 3295 PetscInt header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k; 3296 PetscInt *rowidxs,*colidxs,rs,cs,ce; 3297 PetscScalar *matvals; 3298 PetscErrorCode ierr; 3299 3300 PetscFunctionBegin; 3301 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3302 3303 /* read in matrix header */ 3304 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3305 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3306 M = header[1]; N = header[2]; nz = header[3]; 3307 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 3308 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 3309 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3310 3311 /* set block sizes from the viewer's .info file */ 3312 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3313 /* set local sizes if not set already */ 3314 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3315 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3316 /* set global sizes if not set already */ 3317 if (mat->rmap->N < 0) mat->rmap->N = M; 3318 if (mat->cmap->N < 0) mat->cmap->N = N; 3319 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3320 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3321 3322 /* check if the matrix sizes are correct */ 3323 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3324 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 3325 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 3326 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 3327 ierr = PetscLayoutGetRange(mat->rmap,&rs,NULL); 3328 ierr = PetscLayoutGetRange(mat->cmap,&cs,&ce); 3329 mbs = m/bs; nbs = n/bs; 3330 3331 /* read in row lengths and build row indices */ 3332 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3333 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 3334 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3335 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr); 3336 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 3337 3338 /* read in column indices and matrix values */ 3339 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 3340 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 3341 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 3342 3343 { /* preallocate matrix storage */ 3344 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3345 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3346 PetscBool sbaij,done; 3347 PetscInt *d_nnz,*o_nnz; 3348 3349 ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr); 3350 ierr = PetscHSetICreate(&ht);CHKERRQ(ierr); 3351 ierr = PetscCalloc2(mbs,&d_nnz,mbs,&o_nnz);CHKERRQ(ierr); 3352 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPISBAIJ,&sbaij);CHKERRQ(ierr); 3353 for (i=0; i<mbs; i++) { 3354 ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr); 3355 ierr = PetscHSetIClear(ht);CHKERRQ(ierr); 3356 for (k=0; k<bs; k++) { 3357 PetscInt row = bs*i + k; 3358 for (j=rowidxs[row]; j<rowidxs[row+1]; j++) { 3359 PetscInt col = colidxs[j]; 3360 if (!sbaij || col >= row) { 3361 if (col >= cs && col < ce) { 3362 if (!PetscBTLookupSet(bt,(col-cs)/bs)) d_nnz[i]++; 3363 } else { 3364 ierr = PetscHSetIQueryAdd(ht,col/bs,&done);CHKERRQ(ierr); 3365 if (done) o_nnz[i]++; 3366 } 3367 } 3368 } 3369 } 3370 } 3371 ierr = PetscBTDestroy(&bt);CHKERRQ(ierr); 3372 ierr = PetscHSetIDestroy(&ht);CHKERRQ(ierr); 3373 ierr = MatMPIBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3374 ierr = MatMPISBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3375 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3376 } 3377 3378 /* store matrix values */ 3379 for (i=0; i<m; i++) { 3380 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i+1]; 3381 ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr); 3382 } 3383 3384 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3385 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3386 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3387 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3388 PetscFunctionReturn(0); 3389 } 3390 3391 PetscErrorCode MatLoad_MPIBAIJ(Mat mat,PetscViewer viewer) 3392 { 3393 PetscErrorCode ierr; 3394 PetscBool isbinary; 3395 3396 PetscFunctionBegin; 3397 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3398 if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 3399 ierr = MatLoad_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 3400 PetscFunctionReturn(0); 3401 } 3402 3403 /*@ 3404 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3405 3406 Input Parameters: 3407 + mat - the matrix 3408 - fact - factor 3409 3410 Not Collective, each process can use a different factor 3411 3412 Level: advanced 3413 3414 Notes: 3415 This can also be set by the command line option: -mat_use_hash_table <fact> 3416 3417 .seealso: MatSetOption() 3418 @*/ 3419 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3420 { 3421 PetscErrorCode ierr; 3422 3423 PetscFunctionBegin; 3424 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3425 PetscFunctionReturn(0); 3426 } 3427 3428 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3429 { 3430 Mat_MPIBAIJ *baij; 3431 3432 PetscFunctionBegin; 3433 baij = (Mat_MPIBAIJ*)mat->data; 3434 baij->ht_fact = fact; 3435 PetscFunctionReturn(0); 3436 } 3437 3438 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3439 { 3440 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3441 PetscBool flg; 3442 PetscErrorCode ierr; 3443 3444 PetscFunctionBegin; 3445 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg);CHKERRQ(ierr); 3446 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input"); 3447 if (Ad) *Ad = a->A; 3448 if (Ao) *Ao = a->B; 3449 if (colmap) *colmap = a->garray; 3450 PetscFunctionReturn(0); 3451 } 3452 3453 /* 3454 Special version for direct calls from Fortran (to eliminate two function call overheads 3455 */ 3456 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3457 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3458 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3459 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3460 #endif 3461 3462 /*@C 3463 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3464 3465 Collective on Mat 3466 3467 Input Parameters: 3468 + mat - the matrix 3469 . min - number of input rows 3470 . im - input rows 3471 . nin - number of input columns 3472 . in - input columns 3473 . v - numerical values input 3474 - addvin - INSERT_VALUES or ADD_VALUES 3475 3476 Notes: 3477 This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3478 3479 Level: advanced 3480 3481 .seealso: MatSetValuesBlocked() 3482 @*/ 3483 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3484 { 3485 /* convert input arguments to C version */ 3486 Mat mat = *matin; 3487 PetscInt m = *min, n = *nin; 3488 InsertMode addv = *addvin; 3489 3490 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3491 const MatScalar *value; 3492 MatScalar *barray = baij->barray; 3493 PetscBool roworiented = baij->roworiented; 3494 PetscErrorCode ierr; 3495 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3496 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3497 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3498 3499 PetscFunctionBegin; 3500 /* tasks normally handled by MatSetValuesBlocked() */ 3501 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3502 else if (PetscUnlikely(mat->insertmode != addv)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3503 if (PetscUnlikely(mat->factortype)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3504 if (mat->assembled) { 3505 mat->was_assembled = PETSC_TRUE; 3506 mat->assembled = PETSC_FALSE; 3507 } 3508 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3509 3510 3511 if (!barray) { 3512 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 3513 baij->barray = barray; 3514 } 3515 3516 if (roworiented) stepval = (n-1)*bs; 3517 else stepval = (m-1)*bs; 3518 3519 for (i=0; i<m; i++) { 3520 if (im[i] < 0) continue; 3521 if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3522 if (im[i] >= rstart && im[i] < rend) { 3523 row = im[i] - rstart; 3524 for (j=0; j<n; j++) { 3525 /* If NumCol = 1 then a copy is not required */ 3526 if ((roworiented) && (n == 1)) { 3527 barray = (MatScalar*)v + i*bs2; 3528 } else if ((!roworiented) && (m == 1)) { 3529 barray = (MatScalar*)v + j*bs2; 3530 } else { /* Here a copy is required */ 3531 if (roworiented) { 3532 value = v + i*(stepval+bs)*bs + j*bs; 3533 } else { 3534 value = v + j*(stepval+bs)*bs + i*bs; 3535 } 3536 for (ii=0; ii<bs; ii++,value+=stepval) { 3537 for (jj=0; jj<bs; jj++) { 3538 *barray++ = *value++; 3539 } 3540 } 3541 barray -=bs2; 3542 } 3543 3544 if (in[j] >= cstart && in[j] < cend) { 3545 col = in[j] - cstart; 3546 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 3547 } else if (in[j] < 0) continue; 3548 else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 3549 else { 3550 if (mat->was_assembled) { 3551 if (!baij->colmap) { 3552 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3553 } 3554 3555 #if defined(PETSC_USE_DEBUG) 3556 #if defined(PETSC_USE_CTABLE) 3557 { PetscInt data; 3558 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3559 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3560 } 3561 #else 3562 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3563 #endif 3564 #endif 3565 #if defined(PETSC_USE_CTABLE) 3566 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3567 col = (col - 1)/bs; 3568 #else 3569 col = (baij->colmap[in[j]] - 1)/bs; 3570 #endif 3571 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3572 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3573 col = in[j]; 3574 } 3575 } else col = in[j]; 3576 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 3577 } 3578 } 3579 } else { 3580 if (!baij->donotstash) { 3581 if (roworiented) { 3582 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3583 } else { 3584 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3585 } 3586 } 3587 } 3588 } 3589 3590 /* task normally handled by MatSetValuesBlocked() */ 3591 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3592 PetscFunctionReturn(0); 3593 } 3594 3595 /*@ 3596 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block 3597 CSR format the local rows. 3598 3599 Collective 3600 3601 Input Parameters: 3602 + comm - MPI communicator 3603 . bs - the block size, only a block size of 1 is supported 3604 . m - number of local rows (Cannot be PETSC_DECIDE) 3605 . n - This value should be the same as the local size used in creating the 3606 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3607 calculated if N is given) For square matrices n is almost always m. 3608 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3609 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3610 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3611 . j - column indices 3612 - a - matrix values 3613 3614 Output Parameter: 3615 . mat - the matrix 3616 3617 Level: intermediate 3618 3619 Notes: 3620 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3621 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3622 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3623 3624 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3625 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3626 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3627 with column-major ordering within blocks. 3628 3629 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3630 3631 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3632 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3633 @*/ 3634 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3635 { 3636 PetscErrorCode ierr; 3637 3638 PetscFunctionBegin; 3639 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3640 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3641 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3642 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3643 ierr = MatSetType(*mat,MATMPIBAIJ);CHKERRQ(ierr); 3644 ierr = MatSetBlockSize(*mat,bs);CHKERRQ(ierr); 3645 ierr = MatSetUp(*mat);CHKERRQ(ierr); 3646 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3647 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3648 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3649 PetscFunctionReturn(0); 3650 } 3651 3652 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3653 { 3654 PetscErrorCode ierr; 3655 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3656 PetscInt *indx; 3657 PetscScalar *values; 3658 3659 PetscFunctionBegin; 3660 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3661 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3662 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inmat->data; 3663 PetscInt *dnz,*onz,mbs,Nbs,nbs; 3664 PetscInt *bindx,rmax=a->rmax,j; 3665 PetscMPIInt rank,size; 3666 3667 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3668 mbs = m/bs; Nbs = N/cbs; 3669 if (n == PETSC_DECIDE) { 3670 ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N); 3671 } 3672 nbs = n/cbs; 3673 3674 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 3675 ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */ 3676 3677 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 3678 ierr = MPI_Comm_rank(comm,&size);CHKERRMPI(ierr); 3679 if (rank == size-1) { 3680 /* Check sum(nbs) = Nbs */ 3681 if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs); 3682 } 3683 3684 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */ 3685 for (i=0; i<mbs; i++) { 3686 ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3687 nnz = nnz/bs; 3688 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3689 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3690 ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3691 } 3692 ierr = PetscFree(bindx);CHKERRQ(ierr); 3693 3694 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3695 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3696 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3697 ierr = MatSetType(*outmat,MATBAIJ);CHKERRQ(ierr); 3698 ierr = MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr); 3699 ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3700 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3701 ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3702 } 3703 3704 /* numeric phase */ 3705 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3706 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3707 3708 for (i=0; i<m; i++) { 3709 ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3710 Ii = i + rstart; 3711 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3712 ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3713 } 3714 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3715 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3716 PetscFunctionReturn(0); 3717 } 3718