1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 #if defined(PETSC_HAVE_HYPRE) 8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 9 #endif 10 11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 12 { 13 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 14 PetscInt i,*idxb = NULL,m = A->rmap->n,bs = A->cmap->bs; 15 PetscScalar *va,*vv; 16 Vec vB,vA; 17 const PetscScalar *vb; 18 19 PetscFunctionBegin; 20 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA)); 21 PetscCall(MatGetRowMaxAbs(a->A,vA,idx)); 22 23 PetscCall(VecGetArrayWrite(vA,&va)); 24 if (idx) { 25 for (i=0; i<m; i++) { 26 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 27 } 28 } 29 30 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB)); 31 PetscCall(PetscMalloc1(m,&idxb)); 32 PetscCall(MatGetRowMaxAbs(a->B,vB,idxb)); 33 34 PetscCall(VecGetArrayWrite(v,&vv)); 35 PetscCall(VecGetArrayRead(vB,&vb)); 36 for (i=0; i<m; i++) { 37 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 38 vv[i] = vb[i]; 39 if (idx) idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs); 40 } else { 41 vv[i] = va[i]; 42 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs*a->garray[idxb[i]/bs] + (idxb[i] % bs)) 43 idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs); 44 } 45 } 46 PetscCall(VecRestoreArrayWrite(vA,&vv)); 47 PetscCall(VecRestoreArrayWrite(vA,&va)); 48 PetscCall(VecRestoreArrayRead(vB,&vb)); 49 PetscCall(PetscFree(idxb)); 50 PetscCall(VecDestroy(&vA)); 51 PetscCall(VecDestroy(&vB)); 52 PetscFunctionReturn(0); 53 } 54 55 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 56 { 57 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 58 59 PetscFunctionBegin; 60 PetscCall(MatStoreValues(aij->A)); 61 PetscCall(MatStoreValues(aij->B)); 62 PetscFunctionReturn(0); 63 } 64 65 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 66 { 67 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 68 69 PetscFunctionBegin; 70 PetscCall(MatRetrieveValues(aij->A)); 71 PetscCall(MatRetrieveValues(aij->B)); 72 PetscFunctionReturn(0); 73 } 74 75 /* 76 Local utility routine that creates a mapping from the global column 77 number to the local number in the off-diagonal part of the local 78 storage of the matrix. This is done in a non scalable way since the 79 length of colmap equals the global matrix length. 80 */ 81 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 82 { 83 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 84 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 85 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 86 87 PetscFunctionBegin; 88 #if defined(PETSC_USE_CTABLE) 89 PetscCall(PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap)); 90 for (i=0; i<nbs; i++) { 91 PetscCall(PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES)); 92 } 93 #else 94 PetscCall(PetscCalloc1(baij->Nbs+1,&baij->colmap)); 95 PetscCall(PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt))); 96 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 97 #endif 98 PetscFunctionReturn(0); 99 } 100 101 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 102 { \ 103 brow = row/bs; \ 104 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 105 rmax = aimax[brow]; nrow = ailen[brow]; \ 106 bcol = col/bs; \ 107 ridx = row % bs; cidx = col % bs; \ 108 low = 0; high = nrow; \ 109 while (high-low > 3) { \ 110 t = (low+high)/2; \ 111 if (rp[t] > bcol) high = t; \ 112 else low = t; \ 113 } \ 114 for (_i=low; _i<high; _i++) { \ 115 if (rp[_i] > bcol) break; \ 116 if (rp[_i] == bcol) { \ 117 bap = ap + bs2*_i + bs*cidx + ridx; \ 118 if (addv == ADD_VALUES) *bap += value; \ 119 else *bap = value; \ 120 goto a_noinsert; \ 121 } \ 122 } \ 123 if (a->nonew == 1) goto a_noinsert; \ 124 PetscCheck(a->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 125 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 126 N = nrow++ - 1; \ 127 /* shift up all the later entries in this row */ \ 128 PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1));\ 129 PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1))); \ 130 PetscCall(PetscArrayzero(ap+bs2*_i,bs2)); \ 131 rp[_i] = bcol; \ 132 ap[bs2*_i + bs*cidx + ridx] = value; \ 133 a_noinsert:; \ 134 ailen[brow] = nrow; \ 135 } 136 137 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 138 { \ 139 brow = row/bs; \ 140 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 141 rmax = bimax[brow]; nrow = bilen[brow]; \ 142 bcol = col/bs; \ 143 ridx = row % bs; cidx = col % bs; \ 144 low = 0; high = nrow; \ 145 while (high-low > 3) { \ 146 t = (low+high)/2; \ 147 if (rp[t] > bcol) high = t; \ 148 else low = t; \ 149 } \ 150 for (_i=low; _i<high; _i++) { \ 151 if (rp[_i] > bcol) break; \ 152 if (rp[_i] == bcol) { \ 153 bap = ap + bs2*_i + bs*cidx + ridx; \ 154 if (addv == ADD_VALUES) *bap += value; \ 155 else *bap = value; \ 156 goto b_noinsert; \ 157 } \ 158 } \ 159 if (b->nonew == 1) goto b_noinsert; \ 160 PetscCheck(b->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 161 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 162 N = nrow++ - 1; \ 163 /* shift up all the later entries in this row */ \ 164 PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1));\ 165 PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1)));\ 166 PetscCall(PetscArrayzero(ap+bs2*_i,bs2)); \ 167 rp[_i] = bcol; \ 168 ap[bs2*_i + bs*cidx + ridx] = value; \ 169 b_noinsert:; \ 170 bilen[brow] = nrow; \ 171 } 172 173 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 174 { 175 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 176 MatScalar value; 177 PetscBool roworiented = baij->roworiented; 178 PetscInt i,j,row,col; 179 PetscInt rstart_orig=mat->rmap->rstart; 180 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 181 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 182 183 /* Some Variables required in the macro */ 184 Mat A = baij->A; 185 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 186 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 187 MatScalar *aa =a->a; 188 189 Mat B = baij->B; 190 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 191 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 192 MatScalar *ba =b->a; 193 194 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 195 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 196 MatScalar *ap,*bap; 197 198 PetscFunctionBegin; 199 for (i=0; i<m; i++) { 200 if (im[i] < 0) continue; 201 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 202 if (im[i] >= rstart_orig && im[i] < rend_orig) { 203 row = im[i] - rstart_orig; 204 for (j=0; j<n; j++) { 205 if (in[j] >= cstart_orig && in[j] < cend_orig) { 206 col = in[j] - cstart_orig; 207 if (roworiented) value = v[i*n+j]; 208 else value = v[i+j*m]; 209 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 210 } else if (in[j] < 0) { 211 continue; 212 } else { 213 PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 214 if (mat->was_assembled) { 215 if (!baij->colmap) { 216 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 217 } 218 #if defined(PETSC_USE_CTABLE) 219 PetscCall(PetscTableFind(baij->colmap,in[j]/bs + 1,&col)); 220 col = col - 1; 221 #else 222 col = baij->colmap[in[j]/bs] - 1; 223 #endif 224 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 225 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 226 col = in[j]; 227 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 228 B = baij->B; 229 b = (Mat_SeqBAIJ*)(B)->data; 230 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 231 ba =b->a; 232 } else { 233 PetscCheck(col >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 234 col += in[j]%bs; 235 } 236 } else col = in[j]; 237 if (roworiented) value = v[i*n+j]; 238 else value = v[i+j*m]; 239 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 240 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 241 } 242 } 243 } else { 244 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 245 if (!baij->donotstash) { 246 mat->assembled = PETSC_FALSE; 247 if (roworiented) { 248 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE)); 249 } else { 250 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE)); 251 } 252 } 253 } 254 } 255 PetscFunctionReturn(0); 256 } 257 258 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 259 { 260 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 261 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 262 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 263 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 264 PetscBool roworiented=a->roworiented; 265 const PetscScalar *value = v; 266 MatScalar *ap,*aa = a->a,*bap; 267 268 PetscFunctionBegin; 269 rp = aj + ai[row]; 270 ap = aa + bs2*ai[row]; 271 rmax = imax[row]; 272 nrow = ailen[row]; 273 value = v; 274 low = 0; 275 high = nrow; 276 while (high-low > 7) { 277 t = (low+high)/2; 278 if (rp[t] > col) high = t; 279 else low = t; 280 } 281 for (i=low; i<high; i++) { 282 if (rp[i] > col) break; 283 if (rp[i] == col) { 284 bap = ap + bs2*i; 285 if (roworiented) { 286 if (is == ADD_VALUES) { 287 for (ii=0; ii<bs; ii++) { 288 for (jj=ii; jj<bs2; jj+=bs) { 289 bap[jj] += *value++; 290 } 291 } 292 } else { 293 for (ii=0; ii<bs; ii++) { 294 for (jj=ii; jj<bs2; jj+=bs) { 295 bap[jj] = *value++; 296 } 297 } 298 } 299 } else { 300 if (is == ADD_VALUES) { 301 for (ii=0; ii<bs; ii++,value+=bs) { 302 for (jj=0; jj<bs; jj++) { 303 bap[jj] += value[jj]; 304 } 305 bap += bs; 306 } 307 } else { 308 for (ii=0; ii<bs; ii++,value+=bs) { 309 for (jj=0; jj<bs; jj++) { 310 bap[jj] = value[jj]; 311 } 312 bap += bs; 313 } 314 } 315 } 316 goto noinsert2; 317 } 318 } 319 if (nonew == 1) goto noinsert2; 320 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 321 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 322 N = nrow++ - 1; high++; 323 /* shift up all the later entries in this row */ 324 PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1)); 325 PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1))); 326 rp[i] = col; 327 bap = ap + bs2*i; 328 if (roworiented) { 329 for (ii=0; ii<bs; ii++) { 330 for (jj=ii; jj<bs2; jj+=bs) { 331 bap[jj] = *value++; 332 } 333 } 334 } else { 335 for (ii=0; ii<bs; ii++) { 336 for (jj=0; jj<bs; jj++) { 337 *bap++ = *value++; 338 } 339 } 340 } 341 noinsert2:; 342 ailen[row] = nrow; 343 PetscFunctionReturn(0); 344 } 345 346 /* 347 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 348 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 349 */ 350 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 351 { 352 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 353 const PetscScalar *value; 354 MatScalar *barray = baij->barray; 355 PetscBool roworiented = baij->roworiented; 356 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 357 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 358 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 359 360 PetscFunctionBegin; 361 if (!barray) { 362 PetscCall(PetscMalloc1(bs2,&barray)); 363 baij->barray = barray; 364 } 365 366 if (roworiented) stepval = (n-1)*bs; 367 else stepval = (m-1)*bs; 368 369 for (i=0; i<m; i++) { 370 if (im[i] < 0) continue; 371 PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1); 372 if (im[i] >= rstart && im[i] < rend) { 373 row = im[i] - rstart; 374 for (j=0; j<n; j++) { 375 /* If NumCol = 1 then a copy is not required */ 376 if ((roworiented) && (n == 1)) { 377 barray = (MatScalar*)v + i*bs2; 378 } else if ((!roworiented) && (m == 1)) { 379 barray = (MatScalar*)v + j*bs2; 380 } else { /* Here a copy is required */ 381 if (roworiented) { 382 value = v + (i*(stepval+bs) + j)*bs; 383 } else { 384 value = v + (j*(stepval+bs) + i)*bs; 385 } 386 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 387 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 388 barray += bs; 389 } 390 barray -= bs2; 391 } 392 393 if (in[j] >= cstart && in[j] < cend) { 394 col = in[j] - cstart; 395 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j])); 396 } else if (in[j] < 0) { 397 continue; 398 } else { 399 PetscCheck(in[j] < baij->Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT,in[j],baij->Nbs-1); 400 if (mat->was_assembled) { 401 if (!baij->colmap) { 402 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 403 } 404 405 #if defined(PETSC_USE_DEBUG) 406 #if defined(PETSC_USE_CTABLE) 407 { PetscInt data; 408 PetscCall(PetscTableFind(baij->colmap,in[j]+1,&data)); 409 PetscCheck((data - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 410 } 411 #else 412 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 413 #endif 414 #endif 415 #if defined(PETSC_USE_CTABLE) 416 PetscCall(PetscTableFind(baij->colmap,in[j]+1,&col)); 417 col = (col - 1)/bs; 418 #else 419 col = (baij->colmap[in[j]] - 1)/bs; 420 #endif 421 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 422 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 423 col = in[j]; 424 } else PetscCheck(col >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix",im[i],in[j]); 425 } else col = in[j]; 426 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j])); 427 } 428 } 429 } else { 430 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 431 if (!baij->donotstash) { 432 if (roworiented) { 433 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 434 } else { 435 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 436 } 437 } 438 } 439 } 440 PetscFunctionReturn(0); 441 } 442 443 #define HASH_KEY 0.6180339887 444 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 445 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 446 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 447 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 448 { 449 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 450 PetscBool roworiented = baij->roworiented; 451 PetscInt i,j,row,col; 452 PetscInt rstart_orig=mat->rmap->rstart; 453 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 454 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 455 PetscReal tmp; 456 MatScalar **HD = baij->hd,value; 457 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 458 459 PetscFunctionBegin; 460 for (i=0; i<m; i++) { 461 if (PetscDefined(USE_DEBUG)) { 462 PetscCheck(im[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 463 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 464 } 465 row = im[i]; 466 if (row >= rstart_orig && row < rend_orig) { 467 for (j=0; j<n; j++) { 468 col = in[j]; 469 if (roworiented) value = v[i*n+j]; 470 else value = v[i+j*m]; 471 /* Look up PetscInto the Hash Table */ 472 key = (row/bs)*Nbs+(col/bs)+1; 473 h1 = HASH(size,key,tmp); 474 475 idx = h1; 476 if (PetscDefined(USE_DEBUG)) { 477 insert_ct++; 478 total_ct++; 479 if (HT[idx] != key) { 480 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 481 if (idx == size) { 482 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 483 PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 484 } 485 } 486 } else if (HT[idx] != key) { 487 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 488 if (idx == size) { 489 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 490 PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 491 } 492 } 493 /* A HASH table entry is found, so insert the values at the correct address */ 494 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 495 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 496 } 497 } else if (!baij->donotstash) { 498 if (roworiented) { 499 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE)); 500 } else { 501 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE)); 502 } 503 } 504 } 505 if (PetscDefined(USE_DEBUG)) { 506 baij->ht_total_ct += total_ct; 507 baij->ht_insert_ct += insert_ct; 508 } 509 PetscFunctionReturn(0); 510 } 511 512 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 513 { 514 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 515 PetscBool roworiented = baij->roworiented; 516 PetscInt i,j,ii,jj,row,col; 517 PetscInt rstart=baij->rstartbs; 518 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 519 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 520 PetscReal tmp; 521 MatScalar **HD = baij->hd,*baij_a; 522 const PetscScalar *v_t,*value; 523 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 524 525 PetscFunctionBegin; 526 if (roworiented) stepval = (n-1)*bs; 527 else stepval = (m-1)*bs; 528 529 for (i=0; i<m; i++) { 530 if (PetscDefined(USE_DEBUG)) { 531 PetscCheck(im[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %" PetscInt_FMT,im[i]); 532 PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1); 533 } 534 row = im[i]; 535 v_t = v + i*nbs2; 536 if (row >= rstart && row < rend) { 537 for (j=0; j<n; j++) { 538 col = in[j]; 539 540 /* Look up into the Hash Table */ 541 key = row*Nbs+col+1; 542 h1 = HASH(size,key,tmp); 543 544 idx = h1; 545 if (PetscDefined(USE_DEBUG)) { 546 total_ct++; 547 insert_ct++; 548 if (HT[idx] != key) { 549 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 550 if (idx == size) { 551 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 552 PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 553 } 554 } 555 } else if (HT[idx] != key) { 556 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 557 if (idx == size) { 558 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 559 PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 560 } 561 } 562 baij_a = HD[idx]; 563 if (roworiented) { 564 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 565 /* value = v + (i*(stepval+bs)+j)*bs; */ 566 value = v_t; 567 v_t += bs; 568 if (addv == ADD_VALUES) { 569 for (ii=0; ii<bs; ii++,value+=stepval) { 570 for (jj=ii; jj<bs2; jj+=bs) { 571 baij_a[jj] += *value++; 572 } 573 } 574 } else { 575 for (ii=0; ii<bs; ii++,value+=stepval) { 576 for (jj=ii; jj<bs2; jj+=bs) { 577 baij_a[jj] = *value++; 578 } 579 } 580 } 581 } else { 582 value = v + j*(stepval+bs)*bs + i*bs; 583 if (addv == ADD_VALUES) { 584 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 585 for (jj=0; jj<bs; jj++) { 586 baij_a[jj] += *value++; 587 } 588 } 589 } else { 590 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 591 for (jj=0; jj<bs; jj++) { 592 baij_a[jj] = *value++; 593 } 594 } 595 } 596 } 597 } 598 } else { 599 if (!baij->donotstash) { 600 if (roworiented) { 601 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 602 } else { 603 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 604 } 605 } 606 } 607 } 608 if (PetscDefined(USE_DEBUG)) { 609 baij->ht_total_ct += total_ct; 610 baij->ht_insert_ct += insert_ct; 611 } 612 PetscFunctionReturn(0); 613 } 614 615 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 616 { 617 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 618 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 619 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 620 621 PetscFunctionBegin; 622 for (i=0; i<m; i++) { 623 if (idxm[i] < 0) continue; /* negative row */ 624 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1); 625 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 626 row = idxm[i] - bsrstart; 627 for (j=0; j<n; j++) { 628 if (idxn[j] < 0) continue; /* negative column */ 629 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1); 630 if (idxn[j] >= bscstart && idxn[j] < bscend) { 631 col = idxn[j] - bscstart; 632 PetscCall(MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j)); 633 } else { 634 if (!baij->colmap) { 635 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 636 } 637 #if defined(PETSC_USE_CTABLE) 638 PetscCall(PetscTableFind(baij->colmap,idxn[j]/bs+1,&data)); 639 data--; 640 #else 641 data = baij->colmap[idxn[j]/bs]-1; 642 #endif 643 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 644 else { 645 col = data + idxn[j]%bs; 646 PetscCall(MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j)); 647 } 648 } 649 } 650 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 651 } 652 PetscFunctionReturn(0); 653 } 654 655 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 656 { 657 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 658 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 659 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 660 PetscReal sum = 0.0; 661 MatScalar *v; 662 663 PetscFunctionBegin; 664 if (baij->size == 1) { 665 PetscCall(MatNorm(baij->A,type,nrm)); 666 } else { 667 if (type == NORM_FROBENIUS) { 668 v = amat->a; 669 nz = amat->nz*bs2; 670 for (i=0; i<nz; i++) { 671 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 672 } 673 v = bmat->a; 674 nz = bmat->nz*bs2; 675 for (i=0; i<nz; i++) { 676 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 677 } 678 PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 679 *nrm = PetscSqrtReal(*nrm); 680 } else if (type == NORM_1) { /* max column sum */ 681 PetscReal *tmp,*tmp2; 682 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 683 PetscCall(PetscCalloc1(mat->cmap->N,&tmp)); 684 PetscCall(PetscMalloc1(mat->cmap->N,&tmp2)); 685 v = amat->a; jj = amat->j; 686 for (i=0; i<amat->nz; i++) { 687 for (j=0; j<bs; j++) { 688 col = bs*(cstart + *jj) + j; /* column index */ 689 for (row=0; row<bs; row++) { 690 tmp[col] += PetscAbsScalar(*v); v++; 691 } 692 } 693 jj++; 694 } 695 v = bmat->a; jj = bmat->j; 696 for (i=0; i<bmat->nz; i++) { 697 for (j=0; j<bs; j++) { 698 col = bs*garray[*jj] + j; 699 for (row=0; row<bs; row++) { 700 tmp[col] += PetscAbsScalar(*v); v++; 701 } 702 } 703 jj++; 704 } 705 PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 706 *nrm = 0.0; 707 for (j=0; j<mat->cmap->N; j++) { 708 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 709 } 710 PetscCall(PetscFree(tmp)); 711 PetscCall(PetscFree(tmp2)); 712 } else if (type == NORM_INFINITY) { /* max row sum */ 713 PetscReal *sums; 714 PetscCall(PetscMalloc1(bs,&sums)); 715 sum = 0.0; 716 for (j=0; j<amat->mbs; j++) { 717 for (row=0; row<bs; row++) sums[row] = 0.0; 718 v = amat->a + bs2*amat->i[j]; 719 nz = amat->i[j+1]-amat->i[j]; 720 for (i=0; i<nz; i++) { 721 for (col=0; col<bs; col++) { 722 for (row=0; row<bs; row++) { 723 sums[row] += PetscAbsScalar(*v); v++; 724 } 725 } 726 } 727 v = bmat->a + bs2*bmat->i[j]; 728 nz = bmat->i[j+1]-bmat->i[j]; 729 for (i=0; i<nz; i++) { 730 for (col=0; col<bs; col++) { 731 for (row=0; row<bs; row++) { 732 sums[row] += PetscAbsScalar(*v); v++; 733 } 734 } 735 } 736 for (row=0; row<bs; row++) { 737 if (sums[row] > sum) sum = sums[row]; 738 } 739 } 740 PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat))); 741 PetscCall(PetscFree(sums)); 742 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 743 } 744 PetscFunctionReturn(0); 745 } 746 747 /* 748 Creates the hash table, and sets the table 749 This table is created only once. 750 If new entried need to be added to the matrix 751 then the hash table has to be destroyed and 752 recreated. 753 */ 754 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 755 { 756 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 757 Mat A = baij->A,B=baij->B; 758 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 759 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 760 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 761 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 762 PetscInt *HT,key; 763 MatScalar **HD; 764 PetscReal tmp; 765 #if defined(PETSC_USE_INFO) 766 PetscInt ct=0,max=0; 767 #endif 768 769 PetscFunctionBegin; 770 if (baij->ht) PetscFunctionReturn(0); 771 772 baij->ht_size = (PetscInt)(factor*nz); 773 ht_size = baij->ht_size; 774 775 /* Allocate Memory for Hash Table */ 776 PetscCall(PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht)); 777 HD = baij->hd; 778 HT = baij->ht; 779 780 /* Loop Over A */ 781 for (i=0; i<a->mbs; i++) { 782 for (j=ai[i]; j<ai[i+1]; j++) { 783 row = i+rstart; 784 col = aj[j]+cstart; 785 786 key = row*Nbs + col + 1; 787 h1 = HASH(ht_size,key,tmp); 788 for (k=0; k<ht_size; k++) { 789 if (!HT[(h1+k)%ht_size]) { 790 HT[(h1+k)%ht_size] = key; 791 HD[(h1+k)%ht_size] = a->a + j*bs2; 792 break; 793 #if defined(PETSC_USE_INFO) 794 } else { 795 ct++; 796 #endif 797 } 798 } 799 #if defined(PETSC_USE_INFO) 800 if (k> max) max = k; 801 #endif 802 } 803 } 804 /* Loop Over B */ 805 for (i=0; i<b->mbs; i++) { 806 for (j=bi[i]; j<bi[i+1]; j++) { 807 row = i+rstart; 808 col = garray[bj[j]]; 809 key = row*Nbs + col + 1; 810 h1 = HASH(ht_size,key,tmp); 811 for (k=0; k<ht_size; k++) { 812 if (!HT[(h1+k)%ht_size]) { 813 HT[(h1+k)%ht_size] = key; 814 HD[(h1+k)%ht_size] = b->a + j*bs2; 815 break; 816 #if defined(PETSC_USE_INFO) 817 } else { 818 ct++; 819 #endif 820 } 821 } 822 #if defined(PETSC_USE_INFO) 823 if (k> max) max = k; 824 #endif 825 } 826 } 827 828 /* Print Summary */ 829 #if defined(PETSC_USE_INFO) 830 for (i=0,j=0; i<ht_size; i++) { 831 if (HT[i]) j++; 832 } 833 PetscCall(PetscInfo(mat,"Average Search = %5.2g,max search = %" PetscInt_FMT "\n",(!j) ? (double)0.0:(double)(((PetscReal)(ct+j))/(double)j),max)); 834 #endif 835 PetscFunctionReturn(0); 836 } 837 838 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 839 { 840 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 841 PetscInt nstash,reallocs; 842 843 PetscFunctionBegin; 844 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 845 846 PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range)); 847 PetscCall(MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs)); 848 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 849 PetscCall(PetscInfo(mat,"Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 850 PetscCall(MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs)); 851 PetscCall(PetscInfo(mat,"Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 852 PetscFunctionReturn(0); 853 } 854 855 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 856 { 857 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 858 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 859 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 860 PetscInt *row,*col; 861 PetscBool r1,r2,r3,other_disassembled; 862 MatScalar *val; 863 PetscMPIInt n; 864 865 PetscFunctionBegin; 866 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 867 if (!baij->donotstash && !mat->nooffprocentries) { 868 while (1) { 869 PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg)); 870 if (!flg) break; 871 872 for (i=0; i<n;) { 873 /* Now identify the consecutive vals belonging to the same row */ 874 for (j=i,rstart=row[j]; j<n; j++) { 875 if (row[j] != rstart) break; 876 } 877 if (j < n) ncols = j-i; 878 else ncols = n-i; 879 /* Now assemble all these values with a single function call */ 880 PetscCall(MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode)); 881 i = j; 882 } 883 } 884 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 885 /* Now process the block-stash. Since the values are stashed column-oriented, 886 set the roworiented flag to column oriented, and after MatSetValues() 887 restore the original flags */ 888 r1 = baij->roworiented; 889 r2 = a->roworiented; 890 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 891 892 baij->roworiented = PETSC_FALSE; 893 a->roworiented = PETSC_FALSE; 894 895 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 896 while (1) { 897 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg)); 898 if (!flg) break; 899 900 for (i=0; i<n;) { 901 /* Now identify the consecutive vals belonging to the same row */ 902 for (j=i,rstart=row[j]; j<n; j++) { 903 if (row[j] != rstart) break; 904 } 905 if (j < n) ncols = j-i; 906 else ncols = n-i; 907 PetscCall(MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode)); 908 i = j; 909 } 910 } 911 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 912 913 baij->roworiented = r1; 914 a->roworiented = r2; 915 916 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 917 } 918 919 PetscCall(MatAssemblyBegin(baij->A,mode)); 920 PetscCall(MatAssemblyEnd(baij->A,mode)); 921 922 /* determine if any processor has disassembled, if so we must 923 also disassemble ourselves, in order that we may reassemble. */ 924 /* 925 if nonzero structure of submatrix B cannot change then we know that 926 no processor disassembled thus we can skip this stuff 927 */ 928 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 929 PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat))); 930 if (mat->was_assembled && !other_disassembled) { 931 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 932 } 933 } 934 935 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 936 PetscCall(MatSetUpMultiply_MPIBAIJ(mat)); 937 } 938 PetscCall(MatAssemblyBegin(baij->B,mode)); 939 PetscCall(MatAssemblyEnd(baij->B,mode)); 940 941 #if defined(PETSC_USE_INFO) 942 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 943 PetscCall(PetscInfo(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct)); 944 945 baij->ht_total_ct = 0; 946 baij->ht_insert_ct = 0; 947 } 948 #endif 949 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 950 PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact)); 951 952 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 953 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 954 } 955 956 PetscCall(PetscFree2(baij->rowvalues,baij->rowindices)); 957 958 baij->rowvalues = NULL; 959 960 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 961 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 962 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 963 PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat))); 964 } 965 PetscFunctionReturn(0); 966 } 967 968 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer); 969 #include <petscdraw.h> 970 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 971 { 972 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 973 PetscMPIInt rank = baij->rank; 974 PetscInt bs = mat->rmap->bs; 975 PetscBool iascii,isdraw; 976 PetscViewer sviewer; 977 PetscViewerFormat format; 978 979 PetscFunctionBegin; 980 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 981 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 982 if (iascii) { 983 PetscCall(PetscViewerGetFormat(viewer,&format)); 984 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 985 MatInfo info; 986 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 987 PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 988 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 989 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", 990 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory)); 991 PetscCall(MatGetInfo(baij->A,MAT_LOCAL,&info)); 992 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 993 PetscCall(MatGetInfo(baij->B,MAT_LOCAL,&info)); 994 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 995 PetscCall(PetscViewerFlush(viewer)); 996 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 997 PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n")); 998 PetscCall(VecScatterView(baij->Mvctx,viewer)); 999 PetscFunctionReturn(0); 1000 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1001 PetscCall(PetscViewerASCIIPrintf(viewer," block size is %" PetscInt_FMT "\n",bs)); 1002 PetscFunctionReturn(0); 1003 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1004 PetscFunctionReturn(0); 1005 } 1006 } 1007 1008 if (isdraw) { 1009 PetscDraw draw; 1010 PetscBool isnull; 1011 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 1012 PetscCall(PetscDrawIsNull(draw,&isnull)); 1013 if (isnull) PetscFunctionReturn(0); 1014 } 1015 1016 { 1017 /* assemble the entire matrix onto first processor. */ 1018 Mat A; 1019 Mat_SeqBAIJ *Aloc; 1020 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1021 MatScalar *a; 1022 const char *matname; 1023 1024 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1025 /* Perhaps this should be the type of mat? */ 1026 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A)); 1027 if (rank == 0) { 1028 PetscCall(MatSetSizes(A,M,N,M,N)); 1029 } else { 1030 PetscCall(MatSetSizes(A,0,0,M,N)); 1031 } 1032 PetscCall(MatSetType(A,MATMPIBAIJ)); 1033 PetscCall(MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL)); 1034 PetscCall(MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE)); 1035 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A)); 1036 1037 /* copy over the A part */ 1038 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1039 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1040 PetscCall(PetscMalloc1(bs,&rvals)); 1041 1042 for (i=0; i<mbs; i++) { 1043 rvals[0] = bs*(baij->rstartbs + i); 1044 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1045 for (j=ai[i]; j<ai[i+1]; j++) { 1046 col = (baij->cstartbs+aj[j])*bs; 1047 for (k=0; k<bs; k++) { 1048 PetscCall(MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES)); 1049 col++; a += bs; 1050 } 1051 } 1052 } 1053 /* copy over the B part */ 1054 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1055 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1056 for (i=0; i<mbs; i++) { 1057 rvals[0] = bs*(baij->rstartbs + i); 1058 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1059 for (j=ai[i]; j<ai[i+1]; j++) { 1060 col = baij->garray[aj[j]]*bs; 1061 for (k=0; k<bs; k++) { 1062 PetscCall(MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES)); 1063 col++; a += bs; 1064 } 1065 } 1066 } 1067 PetscCall(PetscFree(rvals)); 1068 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 1069 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 1070 /* 1071 Everyone has to call to draw the matrix since the graphics waits are 1072 synchronized across all processors that share the PetscDraw object 1073 */ 1074 PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1075 PetscCall(PetscObjectGetName((PetscObject)mat,&matname)); 1076 if (rank == 0) { 1077 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname)); 1078 PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer)); 1079 } 1080 PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1081 PetscCall(PetscViewerFlush(viewer)); 1082 PetscCall(MatDestroy(&A)); 1083 } 1084 PetscFunctionReturn(0); 1085 } 1086 1087 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1088 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1089 { 1090 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 1091 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)aij->A->data; 1092 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)aij->B->data; 1093 const PetscInt *garray = aij->garray; 1094 PetscInt header[4],M,N,m,rs,cs,bs,nz,cnt,i,j,ja,jb,k,l; 1095 PetscInt *rowlens,*colidxs; 1096 PetscScalar *matvals; 1097 1098 PetscFunctionBegin; 1099 PetscCall(PetscViewerSetUp(viewer)); 1100 1101 M = mat->rmap->N; 1102 N = mat->cmap->N; 1103 m = mat->rmap->n; 1104 rs = mat->rmap->rstart; 1105 cs = mat->cmap->rstart; 1106 bs = mat->rmap->bs; 1107 nz = bs*bs*(A->nz + B->nz); 1108 1109 /* write matrix header */ 1110 header[0] = MAT_FILE_CLASSID; 1111 header[1] = M; header[2] = N; header[3] = nz; 1112 PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat))); 1113 PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT)); 1114 1115 /* fill in and store row lengths */ 1116 PetscCall(PetscMalloc1(m,&rowlens)); 1117 for (cnt=0, i=0; i<A->mbs; i++) 1118 for (j=0; j<bs; j++) 1119 rowlens[cnt++] = bs*(A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]); 1120 PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT)); 1121 PetscCall(PetscFree(rowlens)); 1122 1123 /* fill in and store column indices */ 1124 PetscCall(PetscMalloc1(nz,&colidxs)); 1125 for (cnt=0, i=0; i<A->mbs; i++) { 1126 for (k=0; k<bs; k++) { 1127 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1128 if (garray[B->j[jb]] > cs/bs) break; 1129 for (l=0; l<bs; l++) 1130 colidxs[cnt++] = bs*garray[B->j[jb]] + l; 1131 } 1132 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1133 for (l=0; l<bs; l++) 1134 colidxs[cnt++] = bs*A->j[ja] + l + cs; 1135 for (; jb<B->i[i+1]; jb++) 1136 for (l=0; l<bs; l++) 1137 colidxs[cnt++] = bs*garray[B->j[jb]] + l; 1138 } 1139 } 1140 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1141 PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_INT)); 1142 PetscCall(PetscFree(colidxs)); 1143 1144 /* fill in and store nonzero values */ 1145 PetscCall(PetscMalloc1(nz,&matvals)); 1146 for (cnt=0, i=0; i<A->mbs; i++) { 1147 for (k=0; k<bs; k++) { 1148 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1149 if (garray[B->j[jb]] > cs/bs) break; 1150 for (l=0; l<bs; l++) 1151 matvals[cnt++] = B->a[bs*(bs*jb + l) + k]; 1152 } 1153 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1154 for (l=0; l<bs; l++) 1155 matvals[cnt++] = A->a[bs*(bs*ja + l) + k]; 1156 for (; jb<B->i[i+1]; jb++) 1157 for (l=0; l<bs; l++) 1158 matvals[cnt++] = B->a[bs*(bs*jb + l) + k]; 1159 } 1160 } 1161 PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_SCALAR)); 1162 PetscCall(PetscFree(matvals)); 1163 1164 /* write block size option to the viewer's .info file */ 1165 PetscCall(MatView_Binary_BlockSizes(mat,viewer)); 1166 PetscFunctionReturn(0); 1167 } 1168 1169 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1170 { 1171 PetscBool iascii,isdraw,issocket,isbinary; 1172 1173 PetscFunctionBegin; 1174 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1175 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1176 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket)); 1177 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1178 if (iascii || isdraw || issocket) { 1179 PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer)); 1180 } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat,viewer)); 1181 PetscFunctionReturn(0); 1182 } 1183 1184 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1185 { 1186 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1187 1188 PetscFunctionBegin; 1189 #if defined(PETSC_USE_LOG) 1190 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 1191 #endif 1192 PetscCall(MatStashDestroy_Private(&mat->stash)); 1193 PetscCall(MatStashDestroy_Private(&mat->bstash)); 1194 PetscCall(MatDestroy(&baij->A)); 1195 PetscCall(MatDestroy(&baij->B)); 1196 #if defined(PETSC_USE_CTABLE) 1197 PetscCall(PetscTableDestroy(&baij->colmap)); 1198 #else 1199 PetscCall(PetscFree(baij->colmap)); 1200 #endif 1201 PetscCall(PetscFree(baij->garray)); 1202 PetscCall(VecDestroy(&baij->lvec)); 1203 PetscCall(VecScatterDestroy(&baij->Mvctx)); 1204 PetscCall(PetscFree2(baij->rowvalues,baij->rowindices)); 1205 PetscCall(PetscFree(baij->barray)); 1206 PetscCall(PetscFree2(baij->hd,baij->ht)); 1207 PetscCall(PetscFree(baij->rangebs)); 1208 PetscCall(PetscFree(mat->data)); 1209 1210 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL)); 1211 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL)); 1212 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL)); 1213 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL)); 1214 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL)); 1215 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL)); 1216 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL)); 1217 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL)); 1218 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpiadj_C",NULL)); 1219 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpiaij_C",NULL)); 1220 #if defined(PETSC_HAVE_HYPRE) 1221 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL)); 1222 #endif 1223 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL)); 1224 PetscFunctionReturn(0); 1225 } 1226 1227 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1228 { 1229 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1230 PetscInt nt; 1231 1232 PetscFunctionBegin; 1233 PetscCall(VecGetLocalSize(xx,&nt)); 1234 PetscCheck(nt == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1235 PetscCall(VecGetLocalSize(yy,&nt)); 1236 PetscCheck(nt == A->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1237 PetscCall(VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1238 PetscCall((*a->A->ops->mult)(a->A,xx,yy)); 1239 PetscCall(VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1240 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy)); 1241 PetscFunctionReturn(0); 1242 } 1243 1244 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1245 { 1246 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1247 1248 PetscFunctionBegin; 1249 PetscCall(VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1250 PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz)); 1251 PetscCall(VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1252 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz)); 1253 PetscFunctionReturn(0); 1254 } 1255 1256 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1257 { 1258 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1259 1260 PetscFunctionBegin; 1261 /* do nondiagonal part */ 1262 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1263 /* do local part */ 1264 PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy)); 1265 /* add partial results together */ 1266 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1267 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1268 PetscFunctionReturn(0); 1269 } 1270 1271 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1272 { 1273 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1274 1275 PetscFunctionBegin; 1276 /* do nondiagonal part */ 1277 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1278 /* do local part */ 1279 PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz)); 1280 /* add partial results together */ 1281 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1282 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1283 PetscFunctionReturn(0); 1284 } 1285 1286 /* 1287 This only works correctly for square matrices where the subblock A->A is the 1288 diagonal block 1289 */ 1290 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1291 { 1292 PetscFunctionBegin; 1293 PetscCheck(A->rmap->N == A->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1294 PetscCall(MatGetDiagonal(((Mat_MPIBAIJ*)A->data)->A,v)); 1295 PetscFunctionReturn(0); 1296 } 1297 1298 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1299 { 1300 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1301 1302 PetscFunctionBegin; 1303 PetscCall(MatScale(a->A,aa)); 1304 PetscCall(MatScale(a->B,aa)); 1305 PetscFunctionReturn(0); 1306 } 1307 1308 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1309 { 1310 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1311 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1312 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1313 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1314 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1315 1316 PetscFunctionBegin; 1317 PetscCheck(row >= brstart && row < brend,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1318 PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1319 mat->getrowactive = PETSC_TRUE; 1320 1321 if (!mat->rowvalues && (idx || v)) { 1322 /* 1323 allocate enough space to hold information from the longest row. 1324 */ 1325 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1326 PetscInt max = 1,mbs = mat->mbs,tmp; 1327 for (i=0; i<mbs; i++) { 1328 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1329 if (max < tmp) max = tmp; 1330 } 1331 PetscCall(PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices)); 1332 } 1333 lrow = row - brstart; 1334 1335 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1336 if (!v) {pvA = NULL; pvB = NULL;} 1337 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1338 PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA)); 1339 PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB)); 1340 nztot = nzA + nzB; 1341 1342 cmap = mat->garray; 1343 if (v || idx) { 1344 if (nztot) { 1345 /* Sort by increasing column numbers, assuming A and B already sorted */ 1346 PetscInt imark = -1; 1347 if (v) { 1348 *v = v_p = mat->rowvalues; 1349 for (i=0; i<nzB; i++) { 1350 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1351 else break; 1352 } 1353 imark = i; 1354 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1355 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1356 } 1357 if (idx) { 1358 *idx = idx_p = mat->rowindices; 1359 if (imark > -1) { 1360 for (i=0; i<imark; i++) { 1361 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1362 } 1363 } else { 1364 for (i=0; i<nzB; i++) { 1365 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1366 else break; 1367 } 1368 imark = i; 1369 } 1370 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1371 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1372 } 1373 } else { 1374 if (idx) *idx = NULL; 1375 if (v) *v = NULL; 1376 } 1377 } 1378 *nz = nztot; 1379 PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA)); 1380 PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB)); 1381 PetscFunctionReturn(0); 1382 } 1383 1384 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1385 { 1386 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1387 1388 PetscFunctionBegin; 1389 PetscCheck(baij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1390 baij->getrowactive = PETSC_FALSE; 1391 PetscFunctionReturn(0); 1392 } 1393 1394 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1395 { 1396 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1397 1398 PetscFunctionBegin; 1399 PetscCall(MatZeroEntries(l->A)); 1400 PetscCall(MatZeroEntries(l->B)); 1401 PetscFunctionReturn(0); 1402 } 1403 1404 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1405 { 1406 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1407 Mat A = a->A,B = a->B; 1408 PetscLogDouble isend[5],irecv[5]; 1409 1410 PetscFunctionBegin; 1411 info->block_size = (PetscReal)matin->rmap->bs; 1412 1413 PetscCall(MatGetInfo(A,MAT_LOCAL,info)); 1414 1415 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1416 isend[3] = info->memory; isend[4] = info->mallocs; 1417 1418 PetscCall(MatGetInfo(B,MAT_LOCAL,info)); 1419 1420 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1421 isend[3] += info->memory; isend[4] += info->mallocs; 1422 1423 if (flag == MAT_LOCAL) { 1424 info->nz_used = isend[0]; 1425 info->nz_allocated = isend[1]; 1426 info->nz_unneeded = isend[2]; 1427 info->memory = isend[3]; 1428 info->mallocs = isend[4]; 1429 } else if (flag == MAT_GLOBAL_MAX) { 1430 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin))); 1431 1432 info->nz_used = irecv[0]; 1433 info->nz_allocated = irecv[1]; 1434 info->nz_unneeded = irecv[2]; 1435 info->memory = irecv[3]; 1436 info->mallocs = irecv[4]; 1437 } else if (flag == MAT_GLOBAL_SUM) { 1438 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin))); 1439 1440 info->nz_used = irecv[0]; 1441 info->nz_allocated = irecv[1]; 1442 info->nz_unneeded = irecv[2]; 1443 info->memory = irecv[3]; 1444 info->mallocs = irecv[4]; 1445 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1446 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1447 info->fill_ratio_needed = 0; 1448 info->factor_mallocs = 0; 1449 PetscFunctionReturn(0); 1450 } 1451 1452 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1453 { 1454 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1455 1456 PetscFunctionBegin; 1457 switch (op) { 1458 case MAT_NEW_NONZERO_LOCATIONS: 1459 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1460 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1461 case MAT_KEEP_NONZERO_PATTERN: 1462 case MAT_NEW_NONZERO_LOCATION_ERR: 1463 MatCheckPreallocated(A,1); 1464 PetscCall(MatSetOption(a->A,op,flg)); 1465 PetscCall(MatSetOption(a->B,op,flg)); 1466 break; 1467 case MAT_ROW_ORIENTED: 1468 MatCheckPreallocated(A,1); 1469 a->roworiented = flg; 1470 1471 PetscCall(MatSetOption(a->A,op,flg)); 1472 PetscCall(MatSetOption(a->B,op,flg)); 1473 break; 1474 case MAT_FORCE_DIAGONAL_ENTRIES: 1475 case MAT_SORTED_FULL: 1476 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 1477 break; 1478 case MAT_IGNORE_OFF_PROC_ENTRIES: 1479 a->donotstash = flg; 1480 break; 1481 case MAT_USE_HASH_TABLE: 1482 a->ht_flag = flg; 1483 a->ht_fact = 1.39; 1484 break; 1485 case MAT_SYMMETRIC: 1486 case MAT_STRUCTURALLY_SYMMETRIC: 1487 case MAT_HERMITIAN: 1488 case MAT_SUBMAT_SINGLEIS: 1489 case MAT_SYMMETRY_ETERNAL: 1490 MatCheckPreallocated(A,1); 1491 PetscCall(MatSetOption(a->A,op,flg)); 1492 break; 1493 default: 1494 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1495 } 1496 PetscFunctionReturn(0); 1497 } 1498 1499 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1500 { 1501 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1502 Mat_SeqBAIJ *Aloc; 1503 Mat B; 1504 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1505 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1506 MatScalar *a; 1507 1508 PetscFunctionBegin; 1509 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1510 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 1511 PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M)); 1512 PetscCall(MatSetType(B,((PetscObject)A)->type_name)); 1513 /* Do not know preallocation information, but must set block size */ 1514 PetscCall(MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL)); 1515 } else { 1516 B = *matout; 1517 } 1518 1519 /* copy over the A part */ 1520 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1521 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1522 PetscCall(PetscMalloc1(bs,&rvals)); 1523 1524 for (i=0; i<mbs; i++) { 1525 rvals[0] = bs*(baij->rstartbs + i); 1526 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1527 for (j=ai[i]; j<ai[i+1]; j++) { 1528 col = (baij->cstartbs+aj[j])*bs; 1529 for (k=0; k<bs; k++) { 1530 PetscCall(MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES)); 1531 1532 col++; a += bs; 1533 } 1534 } 1535 } 1536 /* copy over the B part */ 1537 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1538 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1539 for (i=0; i<mbs; i++) { 1540 rvals[0] = bs*(baij->rstartbs + i); 1541 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1542 for (j=ai[i]; j<ai[i+1]; j++) { 1543 col = baij->garray[aj[j]]*bs; 1544 for (k=0; k<bs; k++) { 1545 PetscCall(MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES)); 1546 col++; 1547 a += bs; 1548 } 1549 } 1550 } 1551 PetscCall(PetscFree(rvals)); 1552 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 1553 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 1554 1555 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1556 else { 1557 PetscCall(MatHeaderMerge(A,&B)); 1558 } 1559 PetscFunctionReturn(0); 1560 } 1561 1562 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1563 { 1564 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1565 Mat a = baij->A,b = baij->B; 1566 PetscInt s1,s2,s3; 1567 1568 PetscFunctionBegin; 1569 PetscCall(MatGetLocalSize(mat,&s2,&s3)); 1570 if (rr) { 1571 PetscCall(VecGetLocalSize(rr,&s1)); 1572 PetscCheck(s1==s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1573 /* Overlap communication with computation. */ 1574 PetscCall(VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1575 } 1576 if (ll) { 1577 PetscCall(VecGetLocalSize(ll,&s1)); 1578 PetscCheck(s1==s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1579 PetscCall((*b->ops->diagonalscale)(b,ll,NULL)); 1580 } 1581 /* scale the diagonal block */ 1582 PetscCall((*a->ops->diagonalscale)(a,ll,rr)); 1583 1584 if (rr) { 1585 /* Do a scatter end and then right scale the off-diagonal block */ 1586 PetscCall(VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1587 PetscCall((*b->ops->diagonalscale)(b,NULL,baij->lvec)); 1588 } 1589 PetscFunctionReturn(0); 1590 } 1591 1592 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1593 { 1594 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1595 PetscInt *lrows; 1596 PetscInt r, len; 1597 PetscBool cong; 1598 1599 PetscFunctionBegin; 1600 /* get locally owned rows */ 1601 PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows)); 1602 /* fix right hand side if needed */ 1603 if (x && b) { 1604 const PetscScalar *xx; 1605 PetscScalar *bb; 1606 1607 PetscCall(VecGetArrayRead(x,&xx)); 1608 PetscCall(VecGetArray(b,&bb)); 1609 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 1610 PetscCall(VecRestoreArrayRead(x,&xx)); 1611 PetscCall(VecRestoreArray(b,&bb)); 1612 } 1613 1614 /* actually zap the local rows */ 1615 /* 1616 Zero the required rows. If the "diagonal block" of the matrix 1617 is square and the user wishes to set the diagonal we use separate 1618 code so that MatSetValues() is not called for each diagonal allocating 1619 new memory, thus calling lots of mallocs and slowing things down. 1620 1621 */ 1622 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1623 PetscCall(MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL)); 1624 PetscCall(MatHasCongruentLayouts(A,&cong)); 1625 if ((diag != 0.0) && cong) { 1626 PetscCall(MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL)); 1627 } else if (diag != 0.0) { 1628 PetscCall(MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL)); 1629 PetscCheck(!((Mat_SeqBAIJ*)l->A->data)->nonew,PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1630 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1631 for (r = 0; r < len; ++r) { 1632 const PetscInt row = lrows[r] + A->rmap->rstart; 1633 PetscCall(MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES)); 1634 } 1635 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 1636 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 1637 } else { 1638 PetscCall(MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL)); 1639 } 1640 PetscCall(PetscFree(lrows)); 1641 1642 /* only change matrix nonzero state if pattern was allowed to be changed */ 1643 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1644 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1645 PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A))); 1646 } 1647 PetscFunctionReturn(0); 1648 } 1649 1650 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1651 { 1652 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1653 PetscMPIInt n = A->rmap->n,p = 0; 1654 PetscInt i,j,k,r,len = 0,row,col,count; 1655 PetscInt *lrows,*owners = A->rmap->range; 1656 PetscSFNode *rrows; 1657 PetscSF sf; 1658 const PetscScalar *xx; 1659 PetscScalar *bb,*mask; 1660 Vec xmask,lmask; 1661 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)l->B->data; 1662 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1663 PetscScalar *aa; 1664 1665 PetscFunctionBegin; 1666 /* Create SF where leaves are input rows and roots are owned rows */ 1667 PetscCall(PetscMalloc1(n, &lrows)); 1668 for (r = 0; r < n; ++r) lrows[r] = -1; 1669 PetscCall(PetscMalloc1(N, &rrows)); 1670 for (r = 0; r < N; ++r) { 1671 const PetscInt idx = rows[r]; 1672 PetscCheck(idx >= 0 && A->rmap->N > idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N); 1673 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1674 PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p)); 1675 } 1676 rrows[r].rank = p; 1677 rrows[r].index = rows[r] - owners[p]; 1678 } 1679 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf)); 1680 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 1681 /* Collect flags for rows to be zeroed */ 1682 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 1683 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 1684 PetscCall(PetscSFDestroy(&sf)); 1685 /* Compress and put in row numbers */ 1686 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1687 /* zero diagonal part of matrix */ 1688 PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b)); 1689 /* handle off diagonal part of matrix */ 1690 PetscCall(MatCreateVecs(A,&xmask,NULL)); 1691 PetscCall(VecDuplicate(l->lvec,&lmask)); 1692 PetscCall(VecGetArray(xmask,&bb)); 1693 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1694 PetscCall(VecRestoreArray(xmask,&bb)); 1695 PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 1696 PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 1697 PetscCall(VecDestroy(&xmask)); 1698 if (x) { 1699 PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1700 PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1701 PetscCall(VecGetArrayRead(l->lvec,&xx)); 1702 PetscCall(VecGetArray(b,&bb)); 1703 } 1704 PetscCall(VecGetArray(lmask,&mask)); 1705 /* remove zeroed rows of off diagonal matrix */ 1706 for (i = 0; i < len; ++i) { 1707 row = lrows[i]; 1708 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1709 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 1710 for (k = 0; k < count; ++k) { 1711 aa[0] = 0.0; 1712 aa += bs; 1713 } 1714 } 1715 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1716 for (i = 0; i < l->B->rmap->N; ++i) { 1717 row = i/bs; 1718 for (j = baij->i[row]; j < baij->i[row+1]; ++j) { 1719 for (k = 0; k < bs; ++k) { 1720 col = bs*baij->j[j] + k; 1721 if (PetscAbsScalar(mask[col])) { 1722 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 1723 if (x) bb[i] -= aa[0]*xx[col]; 1724 aa[0] = 0.0; 1725 } 1726 } 1727 } 1728 } 1729 if (x) { 1730 PetscCall(VecRestoreArray(b,&bb)); 1731 PetscCall(VecRestoreArrayRead(l->lvec,&xx)); 1732 } 1733 PetscCall(VecRestoreArray(lmask,&mask)); 1734 PetscCall(VecDestroy(&lmask)); 1735 PetscCall(PetscFree(lrows)); 1736 1737 /* only change matrix nonzero state if pattern was allowed to be changed */ 1738 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1739 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1740 PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A))); 1741 } 1742 PetscFunctionReturn(0); 1743 } 1744 1745 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1746 { 1747 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1748 1749 PetscFunctionBegin; 1750 PetscCall(MatSetUnfactored(a->A)); 1751 PetscFunctionReturn(0); 1752 } 1753 1754 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 1755 1756 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1757 { 1758 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1759 Mat a,b,c,d; 1760 PetscBool flg; 1761 1762 PetscFunctionBegin; 1763 a = matA->A; b = matA->B; 1764 c = matB->A; d = matB->B; 1765 1766 PetscCall(MatEqual(a,c,&flg)); 1767 if (flg) { 1768 PetscCall(MatEqual(b,d,&flg)); 1769 } 1770 PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 1771 PetscFunctionReturn(0); 1772 } 1773 1774 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1775 { 1776 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1777 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 1778 1779 PetscFunctionBegin; 1780 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1781 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1782 PetscCall(MatCopy_Basic(A,B,str)); 1783 } else { 1784 PetscCall(MatCopy(a->A,b->A,str)); 1785 PetscCall(MatCopy(a->B,b->B,str)); 1786 } 1787 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1788 PetscFunctionReturn(0); 1789 } 1790 1791 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1792 { 1793 PetscFunctionBegin; 1794 PetscCall(MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL)); 1795 PetscFunctionReturn(0); 1796 } 1797 1798 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 1799 { 1800 PetscInt bs = Y->rmap->bs,m = Y->rmap->N/bs; 1801 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 1802 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 1803 1804 PetscFunctionBegin; 1805 PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz)); 1806 PetscFunctionReturn(0); 1807 } 1808 1809 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1810 { 1811 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 1812 PetscBLASInt bnz,one=1; 1813 Mat_SeqBAIJ *x,*y; 1814 PetscInt bs2 = Y->rmap->bs*Y->rmap->bs; 1815 1816 PetscFunctionBegin; 1817 if (str == SAME_NONZERO_PATTERN) { 1818 PetscScalar alpha = a; 1819 x = (Mat_SeqBAIJ*)xx->A->data; 1820 y = (Mat_SeqBAIJ*)yy->A->data; 1821 PetscCall(PetscBLASIntCast(x->nz*bs2,&bnz)); 1822 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1823 x = (Mat_SeqBAIJ*)xx->B->data; 1824 y = (Mat_SeqBAIJ*)yy->B->data; 1825 PetscCall(PetscBLASIntCast(x->nz*bs2,&bnz)); 1826 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1827 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1828 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1829 PetscCall(MatAXPY_Basic(Y,a,X,str)); 1830 } else { 1831 Mat B; 1832 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1833 PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d)); 1834 PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o)); 1835 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B)); 1836 PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name)); 1837 PetscCall(MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N)); 1838 PetscCall(MatSetBlockSizesFromMats(B,Y,Y)); 1839 PetscCall(MatSetType(B,MATMPIBAIJ)); 1840 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d)); 1841 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o)); 1842 PetscCall(MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o)); 1843 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1844 PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str)); 1845 PetscCall(MatHeaderMerge(Y,&B)); 1846 PetscCall(PetscFree(nnz_d)); 1847 PetscCall(PetscFree(nnz_o)); 1848 } 1849 PetscFunctionReturn(0); 1850 } 1851 1852 PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) 1853 { 1854 PetscFunctionBegin; 1855 if (PetscDefined(USE_COMPLEX)) { 1856 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1857 1858 PetscCall(MatConjugate_SeqBAIJ(a->A)); 1859 PetscCall(MatConjugate_SeqBAIJ(a->B)); 1860 } 1861 PetscFunctionReturn(0); 1862 } 1863 1864 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1865 { 1866 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1867 1868 PetscFunctionBegin; 1869 PetscCall(MatRealPart(a->A)); 1870 PetscCall(MatRealPart(a->B)); 1871 PetscFunctionReturn(0); 1872 } 1873 1874 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1875 { 1876 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1877 1878 PetscFunctionBegin; 1879 PetscCall(MatImaginaryPart(a->A)); 1880 PetscCall(MatImaginaryPart(a->B)); 1881 PetscFunctionReturn(0); 1882 } 1883 1884 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1885 { 1886 IS iscol_local; 1887 PetscInt csize; 1888 1889 PetscFunctionBegin; 1890 PetscCall(ISGetLocalSize(iscol,&csize)); 1891 if (call == MAT_REUSE_MATRIX) { 1892 PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local)); 1893 PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1894 } else { 1895 PetscCall(ISAllGather(iscol,&iscol_local)); 1896 } 1897 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat)); 1898 if (call == MAT_INITIAL_MATRIX) { 1899 PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local)); 1900 PetscCall(ISDestroy(&iscol_local)); 1901 } 1902 PetscFunctionReturn(0); 1903 } 1904 1905 /* 1906 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1907 in local and then by concatenating the local matrices the end result. 1908 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1909 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1910 */ 1911 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 1912 { 1913 PetscMPIInt rank,size; 1914 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 1915 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 1916 Mat M,Mreuse; 1917 MatScalar *vwork,*aa; 1918 MPI_Comm comm; 1919 IS isrow_new, iscol_new; 1920 Mat_SeqBAIJ *aij; 1921 1922 PetscFunctionBegin; 1923 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 1924 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 1925 PetscCallMPI(MPI_Comm_size(comm,&size)); 1926 /* The compression and expansion should be avoided. Doesn't point 1927 out errors, might change the indices, hence buggey */ 1928 PetscCall(ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new)); 1929 PetscCall(ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new)); 1930 1931 if (call == MAT_REUSE_MATRIX) { 1932 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse)); 1933 PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1934 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse)); 1935 } else { 1936 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse)); 1937 } 1938 PetscCall(ISDestroy(&isrow_new)); 1939 PetscCall(ISDestroy(&iscol_new)); 1940 /* 1941 m - number of local rows 1942 n - number of columns (same on all processors) 1943 rstart - first row in new global matrix generated 1944 */ 1945 PetscCall(MatGetBlockSize(mat,&bs)); 1946 PetscCall(MatGetSize(Mreuse,&m,&n)); 1947 m = m/bs; 1948 n = n/bs; 1949 1950 if (call == MAT_INITIAL_MATRIX) { 1951 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1952 ii = aij->i; 1953 jj = aij->j; 1954 1955 /* 1956 Determine the number of non-zeros in the diagonal and off-diagonal 1957 portions of the matrix in order to do correct preallocation 1958 */ 1959 1960 /* first get start and end of "diagonal" columns */ 1961 if (csize == PETSC_DECIDE) { 1962 PetscCall(ISGetSize(isrow,&mglobal)); 1963 if (mglobal == n*bs) { /* square matrix */ 1964 nlocal = m; 1965 } else { 1966 nlocal = n/size + ((n % size) > rank); 1967 } 1968 } else { 1969 nlocal = csize/bs; 1970 } 1971 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 1972 rstart = rend - nlocal; 1973 PetscCheck(rank != size - 1 || rend == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n); 1974 1975 /* next, compute all the lengths */ 1976 PetscCall(PetscMalloc2(m+1,&dlens,m+1,&olens)); 1977 for (i=0; i<m; i++) { 1978 jend = ii[i+1] - ii[i]; 1979 olen = 0; 1980 dlen = 0; 1981 for (j=0; j<jend; j++) { 1982 if (*jj < rstart || *jj >= rend) olen++; 1983 else dlen++; 1984 jj++; 1985 } 1986 olens[i] = olen; 1987 dlens[i] = dlen; 1988 } 1989 PetscCall(MatCreate(comm,&M)); 1990 PetscCall(MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n)); 1991 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 1992 PetscCall(MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens)); 1993 PetscCall(MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens)); 1994 PetscCall(PetscFree2(dlens,olens)); 1995 } else { 1996 PetscInt ml,nl; 1997 1998 M = *newmat; 1999 PetscCall(MatGetLocalSize(M,&ml,&nl)); 2000 PetscCheck(ml == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2001 PetscCall(MatZeroEntries(M)); 2002 /* 2003 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2004 rather than the slower MatSetValues(). 2005 */ 2006 M->was_assembled = PETSC_TRUE; 2007 M->assembled = PETSC_FALSE; 2008 } 2009 PetscCall(MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE)); 2010 PetscCall(MatGetOwnershipRange(M,&rstart,&rend)); 2011 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2012 ii = aij->i; 2013 jj = aij->j; 2014 aa = aij->a; 2015 for (i=0; i<m; i++) { 2016 row = rstart/bs + i; 2017 nz = ii[i+1] - ii[i]; 2018 cwork = jj; jj += nz; 2019 vwork = aa; aa += nz*bs*bs; 2020 PetscCall(MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES)); 2021 } 2022 2023 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 2024 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 2025 *newmat = M; 2026 2027 /* save submatrix used in processor for next request */ 2028 if (call == MAT_INITIAL_MATRIX) { 2029 PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse)); 2030 PetscCall(PetscObjectDereference((PetscObject)Mreuse)); 2031 } 2032 PetscFunctionReturn(0); 2033 } 2034 2035 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2036 { 2037 MPI_Comm comm,pcomm; 2038 PetscInt clocal_size,nrows; 2039 const PetscInt *rows; 2040 PetscMPIInt size; 2041 IS crowp,lcolp; 2042 2043 PetscFunctionBegin; 2044 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 2045 /* make a collective version of 'rowp' */ 2046 PetscCall(PetscObjectGetComm((PetscObject)rowp,&pcomm)); 2047 if (pcomm==comm) { 2048 crowp = rowp; 2049 } else { 2050 PetscCall(ISGetSize(rowp,&nrows)); 2051 PetscCall(ISGetIndices(rowp,&rows)); 2052 PetscCall(ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp)); 2053 PetscCall(ISRestoreIndices(rowp,&rows)); 2054 } 2055 PetscCall(ISSetPermutation(crowp)); 2056 /* make a local version of 'colp' */ 2057 PetscCall(PetscObjectGetComm((PetscObject)colp,&pcomm)); 2058 PetscCallMPI(MPI_Comm_size(pcomm,&size)); 2059 if (size==1) { 2060 lcolp = colp; 2061 } else { 2062 PetscCall(ISAllGather(colp,&lcolp)); 2063 } 2064 PetscCall(ISSetPermutation(lcolp)); 2065 /* now we just get the submatrix */ 2066 PetscCall(MatGetLocalSize(A,NULL,&clocal_size)); 2067 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B)); 2068 /* clean up */ 2069 if (pcomm!=comm) { 2070 PetscCall(ISDestroy(&crowp)); 2071 } 2072 if (size>1) { 2073 PetscCall(ISDestroy(&lcolp)); 2074 } 2075 PetscFunctionReturn(0); 2076 } 2077 2078 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2079 { 2080 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2081 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2082 2083 PetscFunctionBegin; 2084 if (nghosts) *nghosts = B->nbs; 2085 if (ghosts) *ghosts = baij->garray; 2086 PetscFunctionReturn(0); 2087 } 2088 2089 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2090 { 2091 Mat B; 2092 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2093 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2094 Mat_SeqAIJ *b; 2095 PetscMPIInt size,rank,*recvcounts = NULL,*displs = NULL; 2096 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2097 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2098 2099 PetscFunctionBegin; 2100 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 2101 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank)); 2102 2103 /* ---------------------------------------------------------------- 2104 Tell every processor the number of nonzeros per row 2105 */ 2106 PetscCall(PetscMalloc1(A->rmap->N/bs,&lens)); 2107 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2108 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2109 } 2110 PetscCall(PetscMalloc1(2*size,&recvcounts)); 2111 displs = recvcounts + size; 2112 for (i=0; i<size; i++) { 2113 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2114 displs[i] = A->rmap->range[i]/bs; 2115 } 2116 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A))); 2117 /* --------------------------------------------------------------- 2118 Create the sequential matrix of the same type as the local block diagonal 2119 */ 2120 PetscCall(MatCreate(PETSC_COMM_SELF,&B)); 2121 PetscCall(MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE)); 2122 PetscCall(MatSetType(B,MATSEQAIJ)); 2123 PetscCall(MatSeqAIJSetPreallocation(B,0,lens)); 2124 b = (Mat_SeqAIJ*)B->data; 2125 2126 /*-------------------------------------------------------------------- 2127 Copy my part of matrix column indices over 2128 */ 2129 sendcount = ad->nz + bd->nz; 2130 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2131 a_jsendbuf = ad->j; 2132 b_jsendbuf = bd->j; 2133 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2134 cnt = 0; 2135 for (i=0; i<n; i++) { 2136 2137 /* put in lower diagonal portion */ 2138 m = bd->i[i+1] - bd->i[i]; 2139 while (m > 0) { 2140 /* is it above diagonal (in bd (compressed) numbering) */ 2141 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2142 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2143 m--; 2144 } 2145 2146 /* put in diagonal portion */ 2147 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2148 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2149 } 2150 2151 /* put in upper diagonal portion */ 2152 while (m-- > 0) { 2153 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2154 } 2155 } 2156 PetscCheck(cnt == sendcount,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT,sendcount,cnt); 2157 2158 /*-------------------------------------------------------------------- 2159 Gather all column indices to all processors 2160 */ 2161 for (i=0; i<size; i++) { 2162 recvcounts[i] = 0; 2163 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2164 recvcounts[i] += lens[j]; 2165 } 2166 } 2167 displs[0] = 0; 2168 for (i=1; i<size; i++) { 2169 displs[i] = displs[i-1] + recvcounts[i-1]; 2170 } 2171 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A))); 2172 /*-------------------------------------------------------------------- 2173 Assemble the matrix into useable form (note numerical values not yet set) 2174 */ 2175 /* set the b->ilen (length of each row) values */ 2176 PetscCall(PetscArraycpy(b->ilen,lens,A->rmap->N/bs)); 2177 /* set the b->i indices */ 2178 b->i[0] = 0; 2179 for (i=1; i<=A->rmap->N/bs; i++) { 2180 b->i[i] = b->i[i-1] + lens[i-1]; 2181 } 2182 PetscCall(PetscFree(lens)); 2183 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 2184 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 2185 PetscCall(PetscFree(recvcounts)); 2186 2187 if (A->symmetric) PetscCall(MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE)); 2188 else if (A->hermitian) PetscCall(MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE)); 2189 else if (A->structurally_symmetric) PetscCall(MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE)); 2190 *newmat = B; 2191 PetscFunctionReturn(0); 2192 } 2193 2194 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2195 { 2196 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2197 Vec bb1 = NULL; 2198 2199 PetscFunctionBegin; 2200 if (flag == SOR_APPLY_UPPER) { 2201 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2202 PetscFunctionReturn(0); 2203 } 2204 2205 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2206 PetscCall(VecDuplicate(bb,&bb1)); 2207 } 2208 2209 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2210 if (flag & SOR_ZERO_INITIAL_GUESS) { 2211 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2212 its--; 2213 } 2214 2215 while (its--) { 2216 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2217 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2218 2219 /* update rhs: bb1 = bb - B*x */ 2220 PetscCall(VecScale(mat->lvec,-1.0)); 2221 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 2222 2223 /* local sweep */ 2224 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx)); 2225 } 2226 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2227 if (flag & SOR_ZERO_INITIAL_GUESS) { 2228 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2229 its--; 2230 } 2231 while (its--) { 2232 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2233 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2234 2235 /* update rhs: bb1 = bb - B*x */ 2236 PetscCall(VecScale(mat->lvec,-1.0)); 2237 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 2238 2239 /* local sweep */ 2240 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx)); 2241 } 2242 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2243 if (flag & SOR_ZERO_INITIAL_GUESS) { 2244 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2245 its--; 2246 } 2247 while (its--) { 2248 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2249 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2250 2251 /* update rhs: bb1 = bb - B*x */ 2252 PetscCall(VecScale(mat->lvec,-1.0)); 2253 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 2254 2255 /* local sweep */ 2256 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx)); 2257 } 2258 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2259 2260 PetscCall(VecDestroy(&bb1)); 2261 PetscFunctionReturn(0); 2262 } 2263 2264 PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A,PetscInt type,PetscReal *reductions) 2265 { 2266 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data; 2267 PetscInt m,N,i,*garray = aij->garray; 2268 PetscInt ib,jb,bs = A->rmap->bs; 2269 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data; 2270 MatScalar *a_val = a_aij->a; 2271 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data; 2272 MatScalar *b_val = b_aij->a; 2273 PetscReal *work; 2274 2275 PetscFunctionBegin; 2276 PetscCall(MatGetSize(A,&m,&N)); 2277 PetscCall(PetscCalloc1(N,&work)); 2278 if (type == NORM_2) { 2279 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2280 for (jb=0; jb<bs; jb++) { 2281 for (ib=0; ib<bs; ib++) { 2282 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2283 a_val++; 2284 } 2285 } 2286 } 2287 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2288 for (jb=0; jb<bs; jb++) { 2289 for (ib=0; ib<bs; ib++) { 2290 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2291 b_val++; 2292 } 2293 } 2294 } 2295 } else if (type == NORM_1) { 2296 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2297 for (jb=0; jb<bs; jb++) { 2298 for (ib=0; ib<bs; ib++) { 2299 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2300 a_val++; 2301 } 2302 } 2303 } 2304 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2305 for (jb=0; jb<bs; jb++) { 2306 for (ib=0; ib<bs; ib++) { 2307 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2308 b_val++; 2309 } 2310 } 2311 } 2312 } else if (type == NORM_INFINITY) { 2313 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2314 for (jb=0; jb<bs; jb++) { 2315 for (ib=0; ib<bs; ib++) { 2316 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2317 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2318 a_val++; 2319 } 2320 } 2321 } 2322 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2323 for (jb=0; jb<bs; jb++) { 2324 for (ib=0; ib<bs; ib++) { 2325 int col = garray[b_aij->j[i]] * bs + jb; 2326 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2327 b_val++; 2328 } 2329 } 2330 } 2331 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2332 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2333 for (jb=0; jb<bs; jb++) { 2334 for (ib=0; ib<bs; ib++) { 2335 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 2336 a_val++; 2337 } 2338 } 2339 } 2340 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2341 for (jb=0; jb<bs; jb++) { 2342 for (ib=0; ib<bs; ib++) { 2343 work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val); 2344 b_val++; 2345 } 2346 } 2347 } 2348 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2349 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2350 for (jb=0; jb<bs; jb++) { 2351 for (ib=0; ib<bs; ib++) { 2352 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 2353 a_val++; 2354 } 2355 } 2356 } 2357 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2358 for (jb=0; jb<bs; jb++) { 2359 for (ib=0; ib<bs; ib++) { 2360 work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val); 2361 b_val++; 2362 } 2363 } 2364 } 2365 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 2366 if (type == NORM_INFINITY) { 2367 PetscCall(MPIU_Allreduce(work,reductions,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A))); 2368 } else { 2369 PetscCall(MPIU_Allreduce(work,reductions,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 2370 } 2371 PetscCall(PetscFree(work)); 2372 if (type == NORM_2) { 2373 for (i=0; i<N; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2374 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2375 for (i=0; i<N; i++) reductions[i] /= m; 2376 } 2377 PetscFunctionReturn(0); 2378 } 2379 2380 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2381 { 2382 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2383 2384 PetscFunctionBegin; 2385 PetscCall(MatInvertBlockDiagonal(a->A,values)); 2386 A->factorerrortype = a->A->factorerrortype; 2387 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2388 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2389 PetscFunctionReturn(0); 2390 } 2391 2392 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a) 2393 { 2394 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ*)Y->data; 2395 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)maij->A->data; 2396 2397 PetscFunctionBegin; 2398 if (!Y->preallocated) { 2399 PetscCall(MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL)); 2400 } else if (!aij->nz) { 2401 PetscInt nonew = aij->nonew; 2402 PetscCall(MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL)); 2403 aij->nonew = nonew; 2404 } 2405 PetscCall(MatShift_Basic(Y,a)); 2406 PetscFunctionReturn(0); 2407 } 2408 2409 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool *missing,PetscInt *d) 2410 { 2411 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2412 2413 PetscFunctionBegin; 2414 PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2415 PetscCall(MatMissingDiagonal(a->A,missing,d)); 2416 if (d) { 2417 PetscInt rstart; 2418 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 2419 *d += rstart/A->rmap->bs; 2420 2421 } 2422 PetscFunctionReturn(0); 2423 } 2424 2425 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2426 { 2427 PetscFunctionBegin; 2428 *a = ((Mat_MPIBAIJ*)A->data)->A; 2429 PetscFunctionReturn(0); 2430 } 2431 2432 /* -------------------------------------------------------------------*/ 2433 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2434 MatGetRow_MPIBAIJ, 2435 MatRestoreRow_MPIBAIJ, 2436 MatMult_MPIBAIJ, 2437 /* 4*/ MatMultAdd_MPIBAIJ, 2438 MatMultTranspose_MPIBAIJ, 2439 MatMultTransposeAdd_MPIBAIJ, 2440 NULL, 2441 NULL, 2442 NULL, 2443 /*10*/ NULL, 2444 NULL, 2445 NULL, 2446 MatSOR_MPIBAIJ, 2447 MatTranspose_MPIBAIJ, 2448 /*15*/ MatGetInfo_MPIBAIJ, 2449 MatEqual_MPIBAIJ, 2450 MatGetDiagonal_MPIBAIJ, 2451 MatDiagonalScale_MPIBAIJ, 2452 MatNorm_MPIBAIJ, 2453 /*20*/ MatAssemblyBegin_MPIBAIJ, 2454 MatAssemblyEnd_MPIBAIJ, 2455 MatSetOption_MPIBAIJ, 2456 MatZeroEntries_MPIBAIJ, 2457 /*24*/ MatZeroRows_MPIBAIJ, 2458 NULL, 2459 NULL, 2460 NULL, 2461 NULL, 2462 /*29*/ MatSetUp_MPIBAIJ, 2463 NULL, 2464 NULL, 2465 MatGetDiagonalBlock_MPIBAIJ, 2466 NULL, 2467 /*34*/ MatDuplicate_MPIBAIJ, 2468 NULL, 2469 NULL, 2470 NULL, 2471 NULL, 2472 /*39*/ MatAXPY_MPIBAIJ, 2473 MatCreateSubMatrices_MPIBAIJ, 2474 MatIncreaseOverlap_MPIBAIJ, 2475 MatGetValues_MPIBAIJ, 2476 MatCopy_MPIBAIJ, 2477 /*44*/ NULL, 2478 MatScale_MPIBAIJ, 2479 MatShift_MPIBAIJ, 2480 NULL, 2481 MatZeroRowsColumns_MPIBAIJ, 2482 /*49*/ NULL, 2483 NULL, 2484 NULL, 2485 NULL, 2486 NULL, 2487 /*54*/ MatFDColoringCreate_MPIXAIJ, 2488 NULL, 2489 MatSetUnfactored_MPIBAIJ, 2490 MatPermute_MPIBAIJ, 2491 MatSetValuesBlocked_MPIBAIJ, 2492 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2493 MatDestroy_MPIBAIJ, 2494 MatView_MPIBAIJ, 2495 NULL, 2496 NULL, 2497 /*64*/ NULL, 2498 NULL, 2499 NULL, 2500 NULL, 2501 NULL, 2502 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2503 NULL, 2504 NULL, 2505 NULL, 2506 NULL, 2507 /*74*/ NULL, 2508 MatFDColoringApply_BAIJ, 2509 NULL, 2510 NULL, 2511 NULL, 2512 /*79*/ NULL, 2513 NULL, 2514 NULL, 2515 NULL, 2516 MatLoad_MPIBAIJ, 2517 /*84*/ NULL, 2518 NULL, 2519 NULL, 2520 NULL, 2521 NULL, 2522 /*89*/ NULL, 2523 NULL, 2524 NULL, 2525 NULL, 2526 NULL, 2527 /*94*/ NULL, 2528 NULL, 2529 NULL, 2530 NULL, 2531 NULL, 2532 /*99*/ NULL, 2533 NULL, 2534 NULL, 2535 MatConjugate_MPIBAIJ, 2536 NULL, 2537 /*104*/NULL, 2538 MatRealPart_MPIBAIJ, 2539 MatImaginaryPart_MPIBAIJ, 2540 NULL, 2541 NULL, 2542 /*109*/NULL, 2543 NULL, 2544 NULL, 2545 NULL, 2546 MatMissingDiagonal_MPIBAIJ, 2547 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2548 NULL, 2549 MatGetGhosts_MPIBAIJ, 2550 NULL, 2551 NULL, 2552 /*119*/NULL, 2553 NULL, 2554 NULL, 2555 NULL, 2556 MatGetMultiProcBlock_MPIBAIJ, 2557 /*124*/NULL, 2558 MatGetColumnReductions_MPIBAIJ, 2559 MatInvertBlockDiagonal_MPIBAIJ, 2560 NULL, 2561 NULL, 2562 /*129*/ NULL, 2563 NULL, 2564 NULL, 2565 NULL, 2566 NULL, 2567 /*134*/ NULL, 2568 NULL, 2569 NULL, 2570 NULL, 2571 NULL, 2572 /*139*/ MatSetBlockSizes_Default, 2573 NULL, 2574 NULL, 2575 MatFDColoringSetUp_MPIXAIJ, 2576 NULL, 2577 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ, 2578 NULL, 2579 NULL, 2580 NULL 2581 }; 2582 2583 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 2584 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 2585 2586 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2587 { 2588 PetscInt m,rstart,cstart,cend; 2589 PetscInt i,j,dlen,olen,nz,nz_max=0,*d_nnz=NULL,*o_nnz=NULL; 2590 const PetscInt *JJ =NULL; 2591 PetscScalar *values=NULL; 2592 PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented; 2593 PetscBool nooffprocentries; 2594 2595 PetscFunctionBegin; 2596 PetscCall(PetscLayoutSetBlockSize(B->rmap,bs)); 2597 PetscCall(PetscLayoutSetBlockSize(B->cmap,bs)); 2598 PetscCall(PetscLayoutSetUp(B->rmap)); 2599 PetscCall(PetscLayoutSetUp(B->cmap)); 2600 PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs)); 2601 m = B->rmap->n/bs; 2602 rstart = B->rmap->rstart/bs; 2603 cstart = B->cmap->rstart/bs; 2604 cend = B->cmap->rend/bs; 2605 2606 PetscCheck(!ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %" PetscInt_FMT,ii[0]); 2607 PetscCall(PetscMalloc2(m,&d_nnz,m,&o_nnz)); 2608 for (i=0; i<m; i++) { 2609 nz = ii[i+1] - ii[i]; 2610 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT,i,nz); 2611 nz_max = PetscMax(nz_max,nz); 2612 dlen = 0; 2613 olen = 0; 2614 JJ = jj + ii[i]; 2615 for (j=0; j<nz; j++) { 2616 if (*JJ < cstart || *JJ >= cend) olen++; 2617 else dlen++; 2618 JJ++; 2619 } 2620 d_nnz[i] = dlen; 2621 o_nnz[i] = olen; 2622 } 2623 PetscCall(MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz)); 2624 PetscCall(PetscFree2(d_nnz,o_nnz)); 2625 2626 values = (PetscScalar*)V; 2627 if (!values) { 2628 PetscCall(PetscCalloc1(bs*bs*nz_max,&values)); 2629 } 2630 for (i=0; i<m; i++) { 2631 PetscInt row = i + rstart; 2632 PetscInt ncols = ii[i+1] - ii[i]; 2633 const PetscInt *icols = jj + ii[i]; 2634 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2635 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2636 PetscCall(MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES)); 2637 } else { /* block ordering does not match so we can only insert one block at a time. */ 2638 PetscInt j; 2639 for (j=0; j<ncols; j++) { 2640 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2641 PetscCall(MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES)); 2642 } 2643 } 2644 } 2645 2646 if (!V) PetscCall(PetscFree(values)); 2647 nooffprocentries = B->nooffprocentries; 2648 B->nooffprocentries = PETSC_TRUE; 2649 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 2650 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 2651 B->nooffprocentries = nooffprocentries; 2652 2653 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 2654 PetscFunctionReturn(0); 2655 } 2656 2657 /*@C 2658 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values 2659 2660 Collective 2661 2662 Input Parameters: 2663 + B - the matrix 2664 . bs - the block size 2665 . i - the indices into j for the start of each local row (starts with zero) 2666 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2667 - v - optional values in the matrix 2668 2669 Level: advanced 2670 2671 Notes: 2672 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 2673 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 2674 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2675 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2676 block column and the second index is over columns within a block. 2677 2678 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2679 2680 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MPIBAIJ` 2681 @*/ 2682 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2683 { 2684 PetscFunctionBegin; 2685 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2686 PetscValidType(B,1); 2687 PetscValidLogicalCollectiveInt(B,bs,2); 2688 PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v)); 2689 PetscFunctionReturn(0); 2690 } 2691 2692 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2693 { 2694 Mat_MPIBAIJ *b; 2695 PetscInt i; 2696 PetscMPIInt size; 2697 2698 PetscFunctionBegin; 2699 PetscCall(MatSetBlockSize(B,PetscAbs(bs))); 2700 PetscCall(PetscLayoutSetUp(B->rmap)); 2701 PetscCall(PetscLayoutSetUp(B->cmap)); 2702 PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs)); 2703 2704 if (d_nnz) { 2705 for (i=0; i<B->rmap->n/bs; i++) { 2706 PetscCheck(d_nnz[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,d_nnz[i]); 2707 } 2708 } 2709 if (o_nnz) { 2710 for (i=0; i<B->rmap->n/bs; i++) { 2711 PetscCheck(o_nnz[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,o_nnz[i]); 2712 } 2713 } 2714 2715 b = (Mat_MPIBAIJ*)B->data; 2716 b->bs2 = bs*bs; 2717 b->mbs = B->rmap->n/bs; 2718 b->nbs = B->cmap->n/bs; 2719 b->Mbs = B->rmap->N/bs; 2720 b->Nbs = B->cmap->N/bs; 2721 2722 for (i=0; i<=b->size; i++) { 2723 b->rangebs[i] = B->rmap->range[i]/bs; 2724 } 2725 b->rstartbs = B->rmap->rstart/bs; 2726 b->rendbs = B->rmap->rend/bs; 2727 b->cstartbs = B->cmap->rstart/bs; 2728 b->cendbs = B->cmap->rend/bs; 2729 2730 #if defined(PETSC_USE_CTABLE) 2731 PetscCall(PetscTableDestroy(&b->colmap)); 2732 #else 2733 PetscCall(PetscFree(b->colmap)); 2734 #endif 2735 PetscCall(PetscFree(b->garray)); 2736 PetscCall(VecDestroy(&b->lvec)); 2737 PetscCall(VecScatterDestroy(&b->Mvctx)); 2738 2739 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2740 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 2741 PetscCall(MatDestroy(&b->B)); 2742 PetscCall(MatCreate(PETSC_COMM_SELF,&b->B)); 2743 PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0)); 2744 PetscCall(MatSetType(b->B,MATSEQBAIJ)); 2745 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B)); 2746 2747 if (!B->preallocated) { 2748 PetscCall(MatCreate(PETSC_COMM_SELF,&b->A)); 2749 PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n)); 2750 PetscCall(MatSetType(b->A,MATSEQBAIJ)); 2751 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A)); 2752 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash)); 2753 } 2754 2755 PetscCall(MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz)); 2756 PetscCall(MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz)); 2757 B->preallocated = PETSC_TRUE; 2758 B->was_assembled = PETSC_FALSE; 2759 B->assembled = PETSC_FALSE; 2760 PetscFunctionReturn(0); 2761 } 2762 2763 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2764 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2765 2766 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 2767 { 2768 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2769 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2770 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2771 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2772 2773 PetscFunctionBegin; 2774 PetscCall(PetscMalloc1(M+1,&ii)); 2775 ii[0] = 0; 2776 for (i=0; i<M; i++) { 2777 PetscCheck((id[i+1] - id[i]) >= 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT,i,id[i],id[i+1]); 2778 PetscCheck((io[i+1] - io[i]) >= 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT,i,io[i],io[i+1]); 2779 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2780 /* remove one from count of matrix has diagonal */ 2781 for (j=id[i]; j<id[i+1]; j++) { 2782 if (jd[j] == i) {ii[i+1]--;break;} 2783 } 2784 } 2785 PetscCall(PetscMalloc1(ii[M],&jj)); 2786 cnt = 0; 2787 for (i=0; i<M; i++) { 2788 for (j=io[i]; j<io[i+1]; j++) { 2789 if (garray[jo[j]] > rstart) break; 2790 jj[cnt++] = garray[jo[j]]; 2791 } 2792 for (k=id[i]; k<id[i+1]; k++) { 2793 if (jd[k] != i) { 2794 jj[cnt++] = rstart + jd[k]; 2795 } 2796 } 2797 for (; j<io[i+1]; j++) { 2798 jj[cnt++] = garray[jo[j]]; 2799 } 2800 } 2801 PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj)); 2802 PetscFunctionReturn(0); 2803 } 2804 2805 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2806 2807 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 2808 2809 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 2810 { 2811 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2812 Mat_MPIAIJ *b; 2813 Mat B; 2814 2815 PetscFunctionBegin; 2816 PetscCheck(A->assembled,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 2817 2818 if (reuse == MAT_REUSE_MATRIX) { 2819 B = *newmat; 2820 } else { 2821 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 2822 PetscCall(MatSetType(B,MATMPIAIJ)); 2823 PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 2824 PetscCall(MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs)); 2825 PetscCall(MatSeqAIJSetPreallocation(B,0,NULL)); 2826 PetscCall(MatMPIAIJSetPreallocation(B,0,NULL,0,NULL)); 2827 } 2828 b = (Mat_MPIAIJ*) B->data; 2829 2830 if (reuse == MAT_REUSE_MATRIX) { 2831 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A)); 2832 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B)); 2833 } else { 2834 PetscCall(MatDestroy(&b->A)); 2835 PetscCall(MatDestroy(&b->B)); 2836 PetscCall(MatDisAssemble_MPIBAIJ(A)); 2837 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A)); 2838 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B)); 2839 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 2840 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 2841 } 2842 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 2843 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 2844 2845 if (reuse == MAT_INPLACE_MATRIX) { 2846 PetscCall(MatHeaderReplace(A,&B)); 2847 } else { 2848 *newmat = B; 2849 } 2850 PetscFunctionReturn(0); 2851 } 2852 2853 /*MC 2854 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2855 2856 Options Database Keys: 2857 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2858 . -mat_block_size <bs> - set the blocksize used to store the matrix 2859 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 2860 - -mat_use_hash_table <fact> - set hash table factor 2861 2862 Level: beginner 2863 2864 Notes: 2865 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 2866 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 2867 2868 .seealso: `MatCreateBAIJ` 2869 M*/ 2870 2871 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 2872 2873 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2874 { 2875 Mat_MPIBAIJ *b; 2876 PetscBool flg = PETSC_FALSE; 2877 2878 PetscFunctionBegin; 2879 PetscCall(PetscNewLog(B,&b)); 2880 B->data = (void*)b; 2881 2882 PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps))); 2883 B->assembled = PETSC_FALSE; 2884 2885 B->insertmode = NOT_SET_VALUES; 2886 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank)); 2887 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size)); 2888 2889 /* build local table of row and column ownerships */ 2890 PetscCall(PetscMalloc1(b->size+1,&b->rangebs)); 2891 2892 /* build cache for off array entries formed */ 2893 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash)); 2894 2895 b->donotstash = PETSC_FALSE; 2896 b->colmap = NULL; 2897 b->garray = NULL; 2898 b->roworiented = PETSC_TRUE; 2899 2900 /* stuff used in block assembly */ 2901 b->barray = NULL; 2902 2903 /* stuff used for matrix vector multiply */ 2904 b->lvec = NULL; 2905 b->Mvctx = NULL; 2906 2907 /* stuff for MatGetRow() */ 2908 b->rowindices = NULL; 2909 b->rowvalues = NULL; 2910 b->getrowactive = PETSC_FALSE; 2911 2912 /* hash table stuff */ 2913 b->ht = NULL; 2914 b->hd = NULL; 2915 b->ht_size = 0; 2916 b->ht_flag = PETSC_FALSE; 2917 b->ht_fact = 0; 2918 b->ht_total_ct = 0; 2919 b->ht_insert_ct = 0; 2920 2921 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2922 b->ijonly = PETSC_FALSE; 2923 2924 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj)); 2925 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ)); 2926 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ)); 2927 #if defined(PETSC_HAVE_HYPRE) 2928 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE)); 2929 #endif 2930 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ)); 2931 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ)); 2932 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ)); 2933 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ)); 2934 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ)); 2935 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ)); 2936 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS)); 2937 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ)); 2938 2939 PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat"); 2940 PetscCall(PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg)); 2941 if (flg) { 2942 PetscReal fact = 1.39; 2943 PetscCall(MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE)); 2944 PetscCall(PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL)); 2945 if (fact <= 1.0) fact = 1.39; 2946 PetscCall(MatMPIBAIJSetHashTableFactor(B,fact)); 2947 PetscCall(PetscInfo(B,"Hash table Factor used %5.2g\n",(double)fact)); 2948 } 2949 PetscOptionsEnd(); 2950 PetscFunctionReturn(0); 2951 } 2952 2953 /*MC 2954 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2955 2956 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2957 and MATMPIBAIJ otherwise. 2958 2959 Options Database Keys: 2960 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2961 2962 Level: beginner 2963 2964 .seealso: `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 2965 M*/ 2966 2967 /*@C 2968 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2969 (block compressed row). For good matrix assembly performance 2970 the user should preallocate the matrix storage by setting the parameters 2971 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2972 performance can be increased by more than a factor of 50. 2973 2974 Collective on Mat 2975 2976 Input Parameters: 2977 + B - the matrix 2978 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2979 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2980 . d_nz - number of block nonzeros per block row in diagonal portion of local 2981 submatrix (same for all local rows) 2982 . d_nnz - array containing the number of block nonzeros in the various block rows 2983 of the in diagonal portion of the local (possibly different for each block 2984 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 2985 set it even if it is zero. 2986 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2987 submatrix (same for all local rows). 2988 - o_nnz - array containing the number of nonzeros in the various block rows of the 2989 off-diagonal portion of the local submatrix (possibly different for 2990 each block row) or NULL. 2991 2992 If the *_nnz parameter is given then the *_nz parameter is ignored 2993 2994 Options Database Keys: 2995 + -mat_block_size - size of the blocks to use 2996 - -mat_use_hash_table <fact> - set hash table factor 2997 2998 Notes: 2999 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3000 than it must be used on all processors that share the object for that argument. 3001 3002 Storage Information: 3003 For a square global matrix we define each processor's diagonal portion 3004 to be its local rows and the corresponding columns (a square submatrix); 3005 each processor's off-diagonal portion encompasses the remainder of the 3006 local matrix (a rectangular submatrix). 3007 3008 The user can specify preallocated storage for the diagonal part of 3009 the local submatrix with either d_nz or d_nnz (not both). Set 3010 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3011 memory allocation. Likewise, specify preallocated storage for the 3012 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3013 3014 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3015 the figure below we depict these three local rows and all columns (0-11). 3016 3017 .vb 3018 0 1 2 3 4 5 6 7 8 9 10 11 3019 -------------------------- 3020 row 3 |o o o d d d o o o o o o 3021 row 4 |o o o d d d o o o o o o 3022 row 5 |o o o d d d o o o o o o 3023 -------------------------- 3024 .ve 3025 3026 Thus, any entries in the d locations are stored in the d (diagonal) 3027 submatrix, and any entries in the o locations are stored in the 3028 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3029 stored simply in the MATSEQBAIJ format for compressed row storage. 3030 3031 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3032 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3033 In general, for PDE problems in which most nonzeros are near the diagonal, 3034 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3035 or you will get TERRIBLE performance; see the users' manual chapter on 3036 matrices. 3037 3038 You can call MatGetInfo() to get information on how effective the preallocation was; 3039 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3040 You can also run with the option -info and look for messages with the string 3041 malloc in them to see if additional memory allocation was needed. 3042 3043 Level: intermediate 3044 3045 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()` 3046 @*/ 3047 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3048 { 3049 PetscFunctionBegin; 3050 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3051 PetscValidType(B,1); 3052 PetscValidLogicalCollectiveInt(B,bs,2); 3053 PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz)); 3054 PetscFunctionReturn(0); 3055 } 3056 3057 /*@C 3058 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3059 (block compressed row). For good matrix assembly performance 3060 the user should preallocate the matrix storage by setting the parameters 3061 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3062 performance can be increased by more than a factor of 50. 3063 3064 Collective 3065 3066 Input Parameters: 3067 + comm - MPI communicator 3068 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3069 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3070 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3071 This value should be the same as the local size used in creating the 3072 y vector for the matrix-vector product y = Ax. 3073 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3074 This value should be the same as the local size used in creating the 3075 x vector for the matrix-vector product y = Ax. 3076 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3077 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3078 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3079 submatrix (same for all local rows) 3080 . d_nnz - array containing the number of nonzero blocks in the various block rows 3081 of the in diagonal portion of the local (possibly different for each block 3082 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3083 and set it even if it is zero. 3084 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3085 submatrix (same for all local rows). 3086 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3087 off-diagonal portion of the local submatrix (possibly different for 3088 each block row) or NULL. 3089 3090 Output Parameter: 3091 . A - the matrix 3092 3093 Options Database Keys: 3094 + -mat_block_size - size of the blocks to use 3095 - -mat_use_hash_table <fact> - set hash table factor 3096 3097 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3098 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3099 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3100 3101 Notes: 3102 If the *_nnz parameter is given then the *_nz parameter is ignored 3103 3104 A nonzero block is any block that as 1 or more nonzeros in it 3105 3106 The user MUST specify either the local or global matrix dimensions 3107 (possibly both). 3108 3109 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3110 than it must be used on all processors that share the object for that argument. 3111 3112 Storage Information: 3113 For a square global matrix we define each processor's diagonal portion 3114 to be its local rows and the corresponding columns (a square submatrix); 3115 each processor's off-diagonal portion encompasses the remainder of the 3116 local matrix (a rectangular submatrix). 3117 3118 The user can specify preallocated storage for the diagonal part of 3119 the local submatrix with either d_nz or d_nnz (not both). Set 3120 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3121 memory allocation. Likewise, specify preallocated storage for the 3122 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3123 3124 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3125 the figure below we depict these three local rows and all columns (0-11). 3126 3127 .vb 3128 0 1 2 3 4 5 6 7 8 9 10 11 3129 -------------------------- 3130 row 3 |o o o d d d o o o o o o 3131 row 4 |o o o d d d o o o o o o 3132 row 5 |o o o d d d o o o o o o 3133 -------------------------- 3134 .ve 3135 3136 Thus, any entries in the d locations are stored in the d (diagonal) 3137 submatrix, and any entries in the o locations are stored in the 3138 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3139 stored simply in the MATSEQBAIJ format for compressed row storage. 3140 3141 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3142 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3143 In general, for PDE problems in which most nonzeros are near the diagonal, 3144 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3145 or you will get TERRIBLE performance; see the users' manual chapter on 3146 matrices. 3147 3148 Level: intermediate 3149 3150 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 3151 @*/ 3152 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3153 { 3154 PetscMPIInt size; 3155 3156 PetscFunctionBegin; 3157 PetscCall(MatCreate(comm,A)); 3158 PetscCall(MatSetSizes(*A,m,n,M,N)); 3159 PetscCallMPI(MPI_Comm_size(comm,&size)); 3160 if (size > 1) { 3161 PetscCall(MatSetType(*A,MATMPIBAIJ)); 3162 PetscCall(MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz)); 3163 } else { 3164 PetscCall(MatSetType(*A,MATSEQBAIJ)); 3165 PetscCall(MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz)); 3166 } 3167 PetscFunctionReturn(0); 3168 } 3169 3170 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3171 { 3172 Mat mat; 3173 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3174 PetscInt len=0; 3175 3176 PetscFunctionBegin; 3177 *newmat = NULL; 3178 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat)); 3179 PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N)); 3180 PetscCall(MatSetType(mat,((PetscObject)matin)->type_name)); 3181 3182 mat->factortype = matin->factortype; 3183 mat->preallocated = PETSC_TRUE; 3184 mat->assembled = PETSC_TRUE; 3185 mat->insertmode = NOT_SET_VALUES; 3186 3187 a = (Mat_MPIBAIJ*)mat->data; 3188 mat->rmap->bs = matin->rmap->bs; 3189 a->bs2 = oldmat->bs2; 3190 a->mbs = oldmat->mbs; 3191 a->nbs = oldmat->nbs; 3192 a->Mbs = oldmat->Mbs; 3193 a->Nbs = oldmat->Nbs; 3194 3195 PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap)); 3196 PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap)); 3197 3198 a->size = oldmat->size; 3199 a->rank = oldmat->rank; 3200 a->donotstash = oldmat->donotstash; 3201 a->roworiented = oldmat->roworiented; 3202 a->rowindices = NULL; 3203 a->rowvalues = NULL; 3204 a->getrowactive = PETSC_FALSE; 3205 a->barray = NULL; 3206 a->rstartbs = oldmat->rstartbs; 3207 a->rendbs = oldmat->rendbs; 3208 a->cstartbs = oldmat->cstartbs; 3209 a->cendbs = oldmat->cendbs; 3210 3211 /* hash table stuff */ 3212 a->ht = NULL; 3213 a->hd = NULL; 3214 a->ht_size = 0; 3215 a->ht_flag = oldmat->ht_flag; 3216 a->ht_fact = oldmat->ht_fact; 3217 a->ht_total_ct = 0; 3218 a->ht_insert_ct = 0; 3219 3220 PetscCall(PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+1)); 3221 if (oldmat->colmap) { 3222 #if defined(PETSC_USE_CTABLE) 3223 PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap)); 3224 #else 3225 PetscCall(PetscMalloc1(a->Nbs,&a->colmap)); 3226 PetscCall(PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt))); 3227 PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs)); 3228 #endif 3229 } else a->colmap = NULL; 3230 3231 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3232 PetscCall(PetscMalloc1(len,&a->garray)); 3233 PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt))); 3234 PetscCall(PetscArraycpy(a->garray,oldmat->garray,len)); 3235 } else a->garray = NULL; 3236 3237 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash)); 3238 PetscCall(VecDuplicate(oldmat->lvec,&a->lvec)); 3239 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec)); 3240 PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx)); 3241 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx)); 3242 3243 PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A)); 3244 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 3245 PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B)); 3246 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B)); 3247 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist)); 3248 *newmat = mat; 3249 PetscFunctionReturn(0); 3250 } 3251 3252 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3253 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 3254 { 3255 PetscInt header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k; 3256 PetscInt *rowidxs,*colidxs,rs,cs,ce; 3257 PetscScalar *matvals; 3258 3259 PetscFunctionBegin; 3260 PetscCall(PetscViewerSetUp(viewer)); 3261 3262 /* read in matrix header */ 3263 PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT)); 3264 PetscCheck(header[0] == MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3265 M = header[1]; N = header[2]; nz = header[3]; 3266 PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M); 3267 PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N); 3268 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3269 3270 /* set block sizes from the viewer's .info file */ 3271 PetscCall(MatLoad_Binary_BlockSizes(mat,viewer)); 3272 /* set local sizes if not set already */ 3273 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3274 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3275 /* set global sizes if not set already */ 3276 if (mat->rmap->N < 0) mat->rmap->N = M; 3277 if (mat->cmap->N < 0) mat->cmap->N = N; 3278 PetscCall(PetscLayoutSetUp(mat->rmap)); 3279 PetscCall(PetscLayoutSetUp(mat->cmap)); 3280 3281 /* check if the matrix sizes are correct */ 3282 PetscCall(MatGetSize(mat,&rows,&cols)); 3283 PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols); 3284 PetscCall(MatGetBlockSize(mat,&bs)); 3285 PetscCall(MatGetLocalSize(mat,&m,&n)); 3286 PetscCall(PetscLayoutGetRange(mat->rmap,&rs,NULL)); 3287 PetscCall(PetscLayoutGetRange(mat->cmap,&cs,&ce)); 3288 mbs = m/bs; nbs = n/bs; 3289 3290 /* read in row lengths and build row indices */ 3291 PetscCall(PetscMalloc1(m+1,&rowidxs)); 3292 PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT)); 3293 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3294 PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer))); 3295 PetscCheck(sum == nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum); 3296 3297 /* read in column indices and matrix values */ 3298 PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals)); 3299 PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 3300 PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 3301 3302 { /* preallocate matrix storage */ 3303 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3304 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3305 PetscBool sbaij,done; 3306 PetscInt *d_nnz,*o_nnz; 3307 3308 PetscCall(PetscBTCreate(nbs,&bt)); 3309 PetscCall(PetscHSetICreate(&ht)); 3310 PetscCall(PetscCalloc2(mbs,&d_nnz,mbs,&o_nnz)); 3311 PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATMPISBAIJ,&sbaij)); 3312 for (i=0; i<mbs; i++) { 3313 PetscCall(PetscBTMemzero(nbs,bt)); 3314 PetscCall(PetscHSetIClear(ht)); 3315 for (k=0; k<bs; k++) { 3316 PetscInt row = bs*i + k; 3317 for (j=rowidxs[row]; j<rowidxs[row+1]; j++) { 3318 PetscInt col = colidxs[j]; 3319 if (!sbaij || col >= row) { 3320 if (col >= cs && col < ce) { 3321 if (!PetscBTLookupSet(bt,(col-cs)/bs)) d_nnz[i]++; 3322 } else { 3323 PetscCall(PetscHSetIQueryAdd(ht,col/bs,&done)); 3324 if (done) o_nnz[i]++; 3325 } 3326 } 3327 } 3328 } 3329 } 3330 PetscCall(PetscBTDestroy(&bt)); 3331 PetscCall(PetscHSetIDestroy(&ht)); 3332 PetscCall(MatMPIBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz)); 3333 PetscCall(MatMPISBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz)); 3334 PetscCall(PetscFree2(d_nnz,o_nnz)); 3335 } 3336 3337 /* store matrix values */ 3338 for (i=0; i<m; i++) { 3339 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i+1]; 3340 PetscCall((*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES)); 3341 } 3342 3343 PetscCall(PetscFree(rowidxs)); 3344 PetscCall(PetscFree2(colidxs,matvals)); 3345 PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); 3346 PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY)); 3347 PetscFunctionReturn(0); 3348 } 3349 3350 PetscErrorCode MatLoad_MPIBAIJ(Mat mat,PetscViewer viewer) 3351 { 3352 PetscBool isbinary; 3353 3354 PetscFunctionBegin; 3355 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 3356 PetscCheck(isbinary,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 3357 PetscCall(MatLoad_MPIBAIJ_Binary(mat,viewer)); 3358 PetscFunctionReturn(0); 3359 } 3360 3361 /*@ 3362 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3363 3364 Input Parameters: 3365 + mat - the matrix 3366 - fact - factor 3367 3368 Not Collective, each process can use a different factor 3369 3370 Level: advanced 3371 3372 Notes: 3373 This can also be set by the command line option: -mat_use_hash_table <fact> 3374 3375 .seealso: `MatSetOption()` 3376 @*/ 3377 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3378 { 3379 PetscFunctionBegin; 3380 PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact)); 3381 PetscFunctionReturn(0); 3382 } 3383 3384 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3385 { 3386 Mat_MPIBAIJ *baij; 3387 3388 PetscFunctionBegin; 3389 baij = (Mat_MPIBAIJ*)mat->data; 3390 baij->ht_fact = fact; 3391 PetscFunctionReturn(0); 3392 } 3393 3394 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3395 { 3396 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3397 PetscBool flg; 3398 3399 PetscFunctionBegin; 3400 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg)); 3401 PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input"); 3402 if (Ad) *Ad = a->A; 3403 if (Ao) *Ao = a->B; 3404 if (colmap) *colmap = a->garray; 3405 PetscFunctionReturn(0); 3406 } 3407 3408 /* 3409 Special version for direct calls from Fortran (to eliminate two function call overheads 3410 */ 3411 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3412 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3413 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3414 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3415 #endif 3416 3417 /*@C 3418 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3419 3420 Collective on Mat 3421 3422 Input Parameters: 3423 + mat - the matrix 3424 . min - number of input rows 3425 . im - input rows 3426 . nin - number of input columns 3427 . in - input columns 3428 . v - numerical values input 3429 - addvin - INSERT_VALUES or ADD_VALUES 3430 3431 Notes: 3432 This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3433 3434 Level: advanced 3435 3436 .seealso: `MatSetValuesBlocked()` 3437 @*/ 3438 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3439 { 3440 /* convert input arguments to C version */ 3441 Mat mat = *matin; 3442 PetscInt m = *min, n = *nin; 3443 InsertMode addv = *addvin; 3444 3445 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3446 const MatScalar *value; 3447 MatScalar *barray = baij->barray; 3448 PetscBool roworiented = baij->roworiented; 3449 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3450 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3451 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3452 3453 PetscFunctionBegin; 3454 /* tasks normally handled by MatSetValuesBlocked() */ 3455 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3456 else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3457 PetscCheck(!mat->factortype,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3458 if (mat->assembled) { 3459 mat->was_assembled = PETSC_TRUE; 3460 mat->assembled = PETSC_FALSE; 3461 } 3462 PetscCall(PetscLogEventBegin(MAT_SetValues,mat,0,0,0)); 3463 3464 if (!barray) { 3465 PetscCall(PetscMalloc1(bs2,&barray)); 3466 baij->barray = barray; 3467 } 3468 3469 if (roworiented) stepval = (n-1)*bs; 3470 else stepval = (m-1)*bs; 3471 3472 for (i=0; i<m; i++) { 3473 if (im[i] < 0) continue; 3474 PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1); 3475 if (im[i] >= rstart && im[i] < rend) { 3476 row = im[i] - rstart; 3477 for (j=0; j<n; j++) { 3478 /* If NumCol = 1 then a copy is not required */ 3479 if ((roworiented) && (n == 1)) { 3480 barray = (MatScalar*)v + i*bs2; 3481 } else if ((!roworiented) && (m == 1)) { 3482 barray = (MatScalar*)v + j*bs2; 3483 } else { /* Here a copy is required */ 3484 if (roworiented) { 3485 value = v + i*(stepval+bs)*bs + j*bs; 3486 } else { 3487 value = v + j*(stepval+bs)*bs + i*bs; 3488 } 3489 for (ii=0; ii<bs; ii++,value+=stepval) { 3490 for (jj=0; jj<bs; jj++) { 3491 *barray++ = *value++; 3492 } 3493 } 3494 barray -=bs2; 3495 } 3496 3497 if (in[j] >= cstart && in[j] < cend) { 3498 col = in[j] - cstart; 3499 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j])); 3500 } else if (in[j] < 0) { 3501 continue; 3502 } else { 3503 PetscCheck(in[j] < baij->Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],baij->Nbs-1); 3504 if (mat->was_assembled) { 3505 if (!baij->colmap) { 3506 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 3507 } 3508 3509 #if defined(PETSC_USE_DEBUG) 3510 #if defined(PETSC_USE_CTABLE) 3511 { PetscInt data; 3512 PetscCall(PetscTableFind(baij->colmap,in[j]+1,&data)); 3513 PetscCheck((data - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3514 } 3515 #else 3516 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3517 #endif 3518 #endif 3519 #if defined(PETSC_USE_CTABLE) 3520 PetscCall(PetscTableFind(baij->colmap,in[j]+1,&col)); 3521 col = (col - 1)/bs; 3522 #else 3523 col = (baij->colmap[in[j]] - 1)/bs; 3524 #endif 3525 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3526 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 3527 col = in[j]; 3528 } 3529 } else col = in[j]; 3530 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j])); 3531 } 3532 } 3533 } else { 3534 if (!baij->donotstash) { 3535 if (roworiented) { 3536 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 3537 } else { 3538 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 3539 } 3540 } 3541 } 3542 } 3543 3544 /* task normally handled by MatSetValuesBlocked() */ 3545 PetscCall(PetscLogEventEnd(MAT_SetValues,mat,0,0,0)); 3546 PetscFunctionReturn(0); 3547 } 3548 3549 /*@ 3550 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block 3551 CSR format the local rows. 3552 3553 Collective 3554 3555 Input Parameters: 3556 + comm - MPI communicator 3557 . bs - the block size, only a block size of 1 is supported 3558 . m - number of local rows (Cannot be PETSC_DECIDE) 3559 . n - This value should be the same as the local size used in creating the 3560 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3561 calculated if N is given) For square matrices n is almost always m. 3562 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3563 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3564 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3565 . j - column indices 3566 - a - matrix values 3567 3568 Output Parameter: 3569 . mat - the matrix 3570 3571 Level: intermediate 3572 3573 Notes: 3574 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3575 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3576 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3577 3578 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3579 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3580 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3581 with column-major ordering within blocks. 3582 3583 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3584 3585 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 3586 `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 3587 @*/ 3588 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3589 { 3590 PetscFunctionBegin; 3591 PetscCheck(!i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3592 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3593 PetscCall(MatCreate(comm,mat)); 3594 PetscCall(MatSetSizes(*mat,m,n,M,N)); 3595 PetscCall(MatSetType(*mat,MATMPIBAIJ)); 3596 PetscCall(MatSetBlockSize(*mat,bs)); 3597 PetscCall(MatSetUp(*mat)); 3598 PetscCall(MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE)); 3599 PetscCall(MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a)); 3600 PetscCall(MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE)); 3601 PetscFunctionReturn(0); 3602 } 3603 3604 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3605 { 3606 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3607 PetscInt *indx; 3608 PetscScalar *values; 3609 3610 PetscFunctionBegin; 3611 PetscCall(MatGetSize(inmat,&m,&N)); 3612 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3613 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inmat->data; 3614 PetscInt *dnz,*onz,mbs,Nbs,nbs; 3615 PetscInt *bindx,rmax=a->rmax,j; 3616 PetscMPIInt rank,size; 3617 3618 PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 3619 mbs = m/bs; Nbs = N/cbs; 3620 if (n == PETSC_DECIDE) { 3621 PetscCall(PetscSplitOwnershipBlock(comm,cbs,&n,&N)); 3622 } 3623 nbs = n/cbs; 3624 3625 PetscCall(PetscMalloc1(rmax,&bindx)); 3626 MatPreallocateBegin(comm,mbs,nbs,dnz,onz); /* inline function, output __end and __rstart are used below */ 3627 3628 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3629 PetscCallMPI(MPI_Comm_rank(comm,&size)); 3630 if (rank == size-1) { 3631 /* Check sum(nbs) = Nbs */ 3632 PetscCheck(__end == Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT,__end,Nbs); 3633 } 3634 3635 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 3636 for (i=0; i<mbs; i++) { 3637 PetscCall(MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL)); /* non-blocked nnz and indx */ 3638 nnz = nnz/bs; 3639 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3640 PetscCall(MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz)); 3641 PetscCall(MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL)); 3642 } 3643 PetscCall(PetscFree(bindx)); 3644 3645 PetscCall(MatCreate(comm,outmat)); 3646 PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 3647 PetscCall(MatSetBlockSizes(*outmat,bs,cbs)); 3648 PetscCall(MatSetType(*outmat,MATBAIJ)); 3649 PetscCall(MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz)); 3650 PetscCall(MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz)); 3651 MatPreallocateEnd(dnz,onz); 3652 PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 3653 } 3654 3655 /* numeric phase */ 3656 PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 3657 PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL)); 3658 3659 for (i=0; i<m; i++) { 3660 PetscCall(MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values)); 3661 Ii = i + rstart; 3662 PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES)); 3663 PetscCall(MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values)); 3664 } 3665 PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY)); 3666 PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY)); 3667 PetscFunctionReturn(0); 3668 } 3669