1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 #if defined(PETSC_HAVE_HYPRE) 8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 9 #endif 10 11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 12 { 13 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 14 PetscInt i,*idxb = NULL,m = A->rmap->n,bs = A->cmap->bs; 15 PetscScalar *va,*vv; 16 Vec vB,vA; 17 const PetscScalar *vb; 18 19 PetscFunctionBegin; 20 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA)); 21 PetscCall(MatGetRowMaxAbs(a->A,vA,idx)); 22 23 PetscCall(VecGetArrayWrite(vA,&va)); 24 if (idx) { 25 for (i=0; i<m; i++) { 26 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 27 } 28 } 29 30 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB)); 31 PetscCall(PetscMalloc1(m,&idxb)); 32 PetscCall(MatGetRowMaxAbs(a->B,vB,idxb)); 33 34 PetscCall(VecGetArrayWrite(v,&vv)); 35 PetscCall(VecGetArrayRead(vB,&vb)); 36 for (i=0; i<m; i++) { 37 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 38 vv[i] = vb[i]; 39 if (idx) idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs); 40 } else { 41 vv[i] = va[i]; 42 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs*a->garray[idxb[i]/bs] + (idxb[i] % bs)) 43 idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs); 44 } 45 } 46 PetscCall(VecRestoreArrayWrite(vA,&vv)); 47 PetscCall(VecRestoreArrayWrite(vA,&va)); 48 PetscCall(VecRestoreArrayRead(vB,&vb)); 49 PetscCall(PetscFree(idxb)); 50 PetscCall(VecDestroy(&vA)); 51 PetscCall(VecDestroy(&vB)); 52 PetscFunctionReturn(0); 53 } 54 55 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 56 { 57 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 58 59 PetscFunctionBegin; 60 PetscCall(MatStoreValues(aij->A)); 61 PetscCall(MatStoreValues(aij->B)); 62 PetscFunctionReturn(0); 63 } 64 65 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 66 { 67 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 68 69 PetscFunctionBegin; 70 PetscCall(MatRetrieveValues(aij->A)); 71 PetscCall(MatRetrieveValues(aij->B)); 72 PetscFunctionReturn(0); 73 } 74 75 /* 76 Local utility routine that creates a mapping from the global column 77 number to the local number in the off-diagonal part of the local 78 storage of the matrix. This is done in a non scalable way since the 79 length of colmap equals the global matrix length. 80 */ 81 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 82 { 83 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 84 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 85 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 86 87 PetscFunctionBegin; 88 #if defined(PETSC_USE_CTABLE) 89 PetscCall(PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap)); 90 for (i=0; i<nbs; i++) { 91 PetscCall(PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES)); 92 } 93 #else 94 PetscCall(PetscCalloc1(baij->Nbs+1,&baij->colmap)); 95 PetscCall(PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt))); 96 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 97 #endif 98 PetscFunctionReturn(0); 99 } 100 101 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 102 { \ 103 brow = row/bs; \ 104 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 105 rmax = aimax[brow]; nrow = ailen[brow]; \ 106 bcol = col/bs; \ 107 ridx = row % bs; cidx = col % bs; \ 108 low = 0; high = nrow; \ 109 while (high-low > 3) { \ 110 t = (low+high)/2; \ 111 if (rp[t] > bcol) high = t; \ 112 else low = t; \ 113 } \ 114 for (_i=low; _i<high; _i++) { \ 115 if (rp[_i] > bcol) break; \ 116 if (rp[_i] == bcol) { \ 117 bap = ap + bs2*_i + bs*cidx + ridx; \ 118 if (addv == ADD_VALUES) *bap += value; \ 119 else *bap = value; \ 120 goto a_noinsert; \ 121 } \ 122 } \ 123 if (a->nonew == 1) goto a_noinsert; \ 124 PetscCheck(a->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 125 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 126 N = nrow++ - 1; \ 127 /* shift up all the later entries in this row */ \ 128 PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1));\ 129 PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1))); \ 130 PetscCall(PetscArrayzero(ap+bs2*_i,bs2)); \ 131 rp[_i] = bcol; \ 132 ap[bs2*_i + bs*cidx + ridx] = value; \ 133 a_noinsert:; \ 134 ailen[brow] = nrow; \ 135 } 136 137 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 138 { \ 139 brow = row/bs; \ 140 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 141 rmax = bimax[brow]; nrow = bilen[brow]; \ 142 bcol = col/bs; \ 143 ridx = row % bs; cidx = col % bs; \ 144 low = 0; high = nrow; \ 145 while (high-low > 3) { \ 146 t = (low+high)/2; \ 147 if (rp[t] > bcol) high = t; \ 148 else low = t; \ 149 } \ 150 for (_i=low; _i<high; _i++) { \ 151 if (rp[_i] > bcol) break; \ 152 if (rp[_i] == bcol) { \ 153 bap = ap + bs2*_i + bs*cidx + ridx; \ 154 if (addv == ADD_VALUES) *bap += value; \ 155 else *bap = value; \ 156 goto b_noinsert; \ 157 } \ 158 } \ 159 if (b->nonew == 1) goto b_noinsert; \ 160 PetscCheck(b->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 161 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 162 N = nrow++ - 1; \ 163 /* shift up all the later entries in this row */ \ 164 PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1));\ 165 PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1)));\ 166 PetscCall(PetscArrayzero(ap+bs2*_i,bs2)); \ 167 rp[_i] = bcol; \ 168 ap[bs2*_i + bs*cidx + ridx] = value; \ 169 b_noinsert:; \ 170 bilen[brow] = nrow; \ 171 } 172 173 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 174 { 175 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 176 MatScalar value; 177 PetscBool roworiented = baij->roworiented; 178 PetscInt i,j,row,col; 179 PetscInt rstart_orig=mat->rmap->rstart; 180 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 181 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 182 183 /* Some Variables required in the macro */ 184 Mat A = baij->A; 185 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 186 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 187 MatScalar *aa =a->a; 188 189 Mat B = baij->B; 190 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 191 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 192 MatScalar *ba =b->a; 193 194 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 195 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 196 MatScalar *ap,*bap; 197 198 PetscFunctionBegin; 199 for (i=0; i<m; i++) { 200 if (im[i] < 0) continue; 201 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 202 if (im[i] >= rstart_orig && im[i] < rend_orig) { 203 row = im[i] - rstart_orig; 204 for (j=0; j<n; j++) { 205 if (in[j] >= cstart_orig && in[j] < cend_orig) { 206 col = in[j] - cstart_orig; 207 if (roworiented) value = v[i*n+j]; 208 else value = v[i+j*m]; 209 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 210 } else if (in[j] < 0) continue; 211 else PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 212 else { 213 if (mat->was_assembled) { 214 if (!baij->colmap) { 215 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 216 } 217 #if defined(PETSC_USE_CTABLE) 218 PetscCall(PetscTableFind(baij->colmap,in[j]/bs + 1,&col)); 219 col = col - 1; 220 #else 221 col = baij->colmap[in[j]/bs] - 1; 222 #endif 223 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 224 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 225 col = in[j]; 226 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 227 B = baij->B; 228 b = (Mat_SeqBAIJ*)(B)->data; 229 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 230 ba =b->a; 231 } else PetscCheck(col >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 232 else col += in[j]%bs; 233 } else col = in[j]; 234 if (roworiented) value = v[i*n+j]; 235 else value = v[i+j*m]; 236 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 237 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 238 } 239 } 240 } else { 241 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 242 if (!baij->donotstash) { 243 mat->assembled = PETSC_FALSE; 244 if (roworiented) { 245 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE)); 246 } else { 247 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE)); 248 } 249 } 250 } 251 } 252 PetscFunctionReturn(0); 253 } 254 255 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 256 { 257 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 258 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 259 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 260 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 261 PetscBool roworiented=a->roworiented; 262 const PetscScalar *value = v; 263 MatScalar *ap,*aa = a->a,*bap; 264 265 PetscFunctionBegin; 266 rp = aj + ai[row]; 267 ap = aa + bs2*ai[row]; 268 rmax = imax[row]; 269 nrow = ailen[row]; 270 value = v; 271 low = 0; 272 high = nrow; 273 while (high-low > 7) { 274 t = (low+high)/2; 275 if (rp[t] > col) high = t; 276 else low = t; 277 } 278 for (i=low; i<high; i++) { 279 if (rp[i] > col) break; 280 if (rp[i] == col) { 281 bap = ap + bs2*i; 282 if (roworiented) { 283 if (is == ADD_VALUES) { 284 for (ii=0; ii<bs; ii++) { 285 for (jj=ii; jj<bs2; jj+=bs) { 286 bap[jj] += *value++; 287 } 288 } 289 } else { 290 for (ii=0; ii<bs; ii++) { 291 for (jj=ii; jj<bs2; jj+=bs) { 292 bap[jj] = *value++; 293 } 294 } 295 } 296 } else { 297 if (is == ADD_VALUES) { 298 for (ii=0; ii<bs; ii++,value+=bs) { 299 for (jj=0; jj<bs; jj++) { 300 bap[jj] += value[jj]; 301 } 302 bap += bs; 303 } 304 } else { 305 for (ii=0; ii<bs; ii++,value+=bs) { 306 for (jj=0; jj<bs; jj++) { 307 bap[jj] = value[jj]; 308 } 309 bap += bs; 310 } 311 } 312 } 313 goto noinsert2; 314 } 315 } 316 if (nonew == 1) goto noinsert2; 317 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 318 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 319 N = nrow++ - 1; high++; 320 /* shift up all the later entries in this row */ 321 PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1)); 322 PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1))); 323 rp[i] = col; 324 bap = ap + bs2*i; 325 if (roworiented) { 326 for (ii=0; ii<bs; ii++) { 327 for (jj=ii; jj<bs2; jj+=bs) { 328 bap[jj] = *value++; 329 } 330 } 331 } else { 332 for (ii=0; ii<bs; ii++) { 333 for (jj=0; jj<bs; jj++) { 334 *bap++ = *value++; 335 } 336 } 337 } 338 noinsert2:; 339 ailen[row] = nrow; 340 PetscFunctionReturn(0); 341 } 342 343 /* 344 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 345 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 346 */ 347 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 348 { 349 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 350 const PetscScalar *value; 351 MatScalar *barray = baij->barray; 352 PetscBool roworiented = baij->roworiented; 353 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 354 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 355 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 356 357 PetscFunctionBegin; 358 if (!barray) { 359 PetscCall(PetscMalloc1(bs2,&barray)); 360 baij->barray = barray; 361 } 362 363 if (roworiented) stepval = (n-1)*bs; 364 else stepval = (m-1)*bs; 365 366 for (i=0; i<m; i++) { 367 if (im[i] < 0) continue; 368 PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1); 369 if (im[i] >= rstart && im[i] < rend) { 370 row = im[i] - rstart; 371 for (j=0; j<n; j++) { 372 /* If NumCol = 1 then a copy is not required */ 373 if ((roworiented) && (n == 1)) { 374 barray = (MatScalar*)v + i*bs2; 375 } else if ((!roworiented) && (m == 1)) { 376 barray = (MatScalar*)v + j*bs2; 377 } else { /* Here a copy is required */ 378 if (roworiented) { 379 value = v + (i*(stepval+bs) + j)*bs; 380 } else { 381 value = v + (j*(stepval+bs) + i)*bs; 382 } 383 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 384 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 385 barray += bs; 386 } 387 barray -= bs2; 388 } 389 390 if (in[j] >= cstart && in[j] < cend) { 391 col = in[j] - cstart; 392 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j])); 393 } else if (in[j] < 0) continue; 394 else PetscCheck(in[j] < baij->Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT,in[j],baij->Nbs-1); 395 else { 396 if (mat->was_assembled) { 397 if (!baij->colmap) { 398 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 399 } 400 401 #if defined(PETSC_USE_DEBUG) 402 #if defined(PETSC_USE_CTABLE) 403 { PetscInt data; 404 PetscCall(PetscTableFind(baij->colmap,in[j]+1,&data)); 405 PetscCheck((data - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 406 } 407 #else 408 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 409 #endif 410 #endif 411 #if defined(PETSC_USE_CTABLE) 412 PetscCall(PetscTableFind(baij->colmap,in[j]+1,&col)); 413 col = (col - 1)/bs; 414 #else 415 col = (baij->colmap[in[j]] - 1)/bs; 416 #endif 417 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 418 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 419 col = in[j]; 420 } else PetscCheck(col >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix",im[i],in[j]); 421 } else col = in[j]; 422 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j])); 423 } 424 } 425 } else { 426 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 427 if (!baij->donotstash) { 428 if (roworiented) { 429 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 430 } else { 431 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 432 } 433 } 434 } 435 } 436 PetscFunctionReturn(0); 437 } 438 439 #define HASH_KEY 0.6180339887 440 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 441 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 442 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 443 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 444 { 445 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 446 PetscBool roworiented = baij->roworiented; 447 PetscInt i,j,row,col; 448 PetscInt rstart_orig=mat->rmap->rstart; 449 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 450 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 451 PetscReal tmp; 452 MatScalar **HD = baij->hd,value; 453 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 454 455 PetscFunctionBegin; 456 for (i=0; i<m; i++) { 457 if (PetscDefined(USE_DEBUG)) { 458 PetscCheck(im[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 459 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 460 } 461 row = im[i]; 462 if (row >= rstart_orig && row < rend_orig) { 463 for (j=0; j<n; j++) { 464 col = in[j]; 465 if (roworiented) value = v[i*n+j]; 466 else value = v[i+j*m]; 467 /* Look up PetscInto the Hash Table */ 468 key = (row/bs)*Nbs+(col/bs)+1; 469 h1 = HASH(size,key,tmp); 470 471 idx = h1; 472 if (PetscDefined(USE_DEBUG)) { 473 insert_ct++; 474 total_ct++; 475 if (HT[idx] != key) { 476 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 477 if (idx == size) { 478 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 479 PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 480 } 481 } 482 } else if (HT[idx] != key) { 483 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 484 if (idx == size) { 485 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 486 PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 487 } 488 } 489 /* A HASH table entry is found, so insert the values at the correct address */ 490 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 491 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 492 } 493 } else if (!baij->donotstash) { 494 if (roworiented) { 495 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE)); 496 } else { 497 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE)); 498 } 499 } 500 } 501 if (PetscDefined(USE_DEBUG)) { 502 baij->ht_total_ct += total_ct; 503 baij->ht_insert_ct += insert_ct; 504 } 505 PetscFunctionReturn(0); 506 } 507 508 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 509 { 510 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 511 PetscBool roworiented = baij->roworiented; 512 PetscInt i,j,ii,jj,row,col; 513 PetscInt rstart=baij->rstartbs; 514 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 515 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 516 PetscReal tmp; 517 MatScalar **HD = baij->hd,*baij_a; 518 const PetscScalar *v_t,*value; 519 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 520 521 PetscFunctionBegin; 522 if (roworiented) stepval = (n-1)*bs; 523 else stepval = (m-1)*bs; 524 525 for (i=0; i<m; i++) { 526 if (PetscDefined(USE_DEBUG)) { 527 PetscCheck(im[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %" PetscInt_FMT,im[i]); 528 PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1); 529 } 530 row = im[i]; 531 v_t = v + i*nbs2; 532 if (row >= rstart && row < rend) { 533 for (j=0; j<n; j++) { 534 col = in[j]; 535 536 /* Look up into the Hash Table */ 537 key = row*Nbs+col+1; 538 h1 = HASH(size,key,tmp); 539 540 idx = h1; 541 if (PetscDefined(USE_DEBUG)) { 542 total_ct++; 543 insert_ct++; 544 if (HT[idx] != key) { 545 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 546 if (idx == size) { 547 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 548 PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 549 } 550 } 551 } else if (HT[idx] != key) { 552 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 553 if (idx == size) { 554 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 555 PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 556 } 557 } 558 baij_a = HD[idx]; 559 if (roworiented) { 560 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 561 /* value = v + (i*(stepval+bs)+j)*bs; */ 562 value = v_t; 563 v_t += bs; 564 if (addv == ADD_VALUES) { 565 for (ii=0; ii<bs; ii++,value+=stepval) { 566 for (jj=ii; jj<bs2; jj+=bs) { 567 baij_a[jj] += *value++; 568 } 569 } 570 } else { 571 for (ii=0; ii<bs; ii++,value+=stepval) { 572 for (jj=ii; jj<bs2; jj+=bs) { 573 baij_a[jj] = *value++; 574 } 575 } 576 } 577 } else { 578 value = v + j*(stepval+bs)*bs + i*bs; 579 if (addv == ADD_VALUES) { 580 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 581 for (jj=0; jj<bs; jj++) { 582 baij_a[jj] += *value++; 583 } 584 } 585 } else { 586 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 587 for (jj=0; jj<bs; jj++) { 588 baij_a[jj] = *value++; 589 } 590 } 591 } 592 } 593 } 594 } else { 595 if (!baij->donotstash) { 596 if (roworiented) { 597 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 598 } else { 599 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 600 } 601 } 602 } 603 } 604 if (PetscDefined(USE_DEBUG)) { 605 baij->ht_total_ct += total_ct; 606 baij->ht_insert_ct += insert_ct; 607 } 608 PetscFunctionReturn(0); 609 } 610 611 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 612 { 613 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 614 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 615 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 616 617 PetscFunctionBegin; 618 for (i=0; i<m; i++) { 619 if (idxm[i] < 0) continue; /* negative row */ 620 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1); 621 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 622 row = idxm[i] - bsrstart; 623 for (j=0; j<n; j++) { 624 if (idxn[j] < 0) continue; /* negative column */ 625 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1); 626 if (idxn[j] >= bscstart && idxn[j] < bscend) { 627 col = idxn[j] - bscstart; 628 PetscCall(MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j)); 629 } else { 630 if (!baij->colmap) { 631 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 632 } 633 #if defined(PETSC_USE_CTABLE) 634 PetscCall(PetscTableFind(baij->colmap,idxn[j]/bs+1,&data)); 635 data--; 636 #else 637 data = baij->colmap[idxn[j]/bs]-1; 638 #endif 639 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 640 else { 641 col = data + idxn[j]%bs; 642 PetscCall(MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j)); 643 } 644 } 645 } 646 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 647 } 648 PetscFunctionReturn(0); 649 } 650 651 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 652 { 653 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 654 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 655 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 656 PetscReal sum = 0.0; 657 MatScalar *v; 658 659 PetscFunctionBegin; 660 if (baij->size == 1) { 661 PetscCall(MatNorm(baij->A,type,nrm)); 662 } else { 663 if (type == NORM_FROBENIUS) { 664 v = amat->a; 665 nz = amat->nz*bs2; 666 for (i=0; i<nz; i++) { 667 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 668 } 669 v = bmat->a; 670 nz = bmat->nz*bs2; 671 for (i=0; i<nz; i++) { 672 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 673 } 674 PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 675 *nrm = PetscSqrtReal(*nrm); 676 } else if (type == NORM_1) { /* max column sum */ 677 PetscReal *tmp,*tmp2; 678 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 679 PetscCall(PetscCalloc1(mat->cmap->N,&tmp)); 680 PetscCall(PetscMalloc1(mat->cmap->N,&tmp2)); 681 v = amat->a; jj = amat->j; 682 for (i=0; i<amat->nz; i++) { 683 for (j=0; j<bs; j++) { 684 col = bs*(cstart + *jj) + j; /* column index */ 685 for (row=0; row<bs; row++) { 686 tmp[col] += PetscAbsScalar(*v); v++; 687 } 688 } 689 jj++; 690 } 691 v = bmat->a; jj = bmat->j; 692 for (i=0; i<bmat->nz; i++) { 693 for (j=0; j<bs; j++) { 694 col = bs*garray[*jj] + j; 695 for (row=0; row<bs; row++) { 696 tmp[col] += PetscAbsScalar(*v); v++; 697 } 698 } 699 jj++; 700 } 701 PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 702 *nrm = 0.0; 703 for (j=0; j<mat->cmap->N; j++) { 704 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 705 } 706 PetscCall(PetscFree(tmp)); 707 PetscCall(PetscFree(tmp2)); 708 } else if (type == NORM_INFINITY) { /* max row sum */ 709 PetscReal *sums; 710 PetscCall(PetscMalloc1(bs,&sums)); 711 sum = 0.0; 712 for (j=0; j<amat->mbs; j++) { 713 for (row=0; row<bs; row++) sums[row] = 0.0; 714 v = amat->a + bs2*amat->i[j]; 715 nz = amat->i[j+1]-amat->i[j]; 716 for (i=0; i<nz; i++) { 717 for (col=0; col<bs; col++) { 718 for (row=0; row<bs; row++) { 719 sums[row] += PetscAbsScalar(*v); v++; 720 } 721 } 722 } 723 v = bmat->a + bs2*bmat->i[j]; 724 nz = bmat->i[j+1]-bmat->i[j]; 725 for (i=0; i<nz; i++) { 726 for (col=0; col<bs; col++) { 727 for (row=0; row<bs; row++) { 728 sums[row] += PetscAbsScalar(*v); v++; 729 } 730 } 731 } 732 for (row=0; row<bs; row++) { 733 if (sums[row] > sum) sum = sums[row]; 734 } 735 } 736 PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat))); 737 PetscCall(PetscFree(sums)); 738 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 739 } 740 PetscFunctionReturn(0); 741 } 742 743 /* 744 Creates the hash table, and sets the table 745 This table is created only once. 746 If new entried need to be added to the matrix 747 then the hash table has to be destroyed and 748 recreated. 749 */ 750 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 751 { 752 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 753 Mat A = baij->A,B=baij->B; 754 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 755 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 756 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 757 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 758 PetscInt *HT,key; 759 MatScalar **HD; 760 PetscReal tmp; 761 #if defined(PETSC_USE_INFO) 762 PetscInt ct=0,max=0; 763 #endif 764 765 PetscFunctionBegin; 766 if (baij->ht) PetscFunctionReturn(0); 767 768 baij->ht_size = (PetscInt)(factor*nz); 769 ht_size = baij->ht_size; 770 771 /* Allocate Memory for Hash Table */ 772 PetscCall(PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht)); 773 HD = baij->hd; 774 HT = baij->ht; 775 776 /* Loop Over A */ 777 for (i=0; i<a->mbs; i++) { 778 for (j=ai[i]; j<ai[i+1]; j++) { 779 row = i+rstart; 780 col = aj[j]+cstart; 781 782 key = row*Nbs + col + 1; 783 h1 = HASH(ht_size,key,tmp); 784 for (k=0; k<ht_size; k++) { 785 if (!HT[(h1+k)%ht_size]) { 786 HT[(h1+k)%ht_size] = key; 787 HD[(h1+k)%ht_size] = a->a + j*bs2; 788 break; 789 #if defined(PETSC_USE_INFO) 790 } else { 791 ct++; 792 #endif 793 } 794 } 795 #if defined(PETSC_USE_INFO) 796 if (k> max) max = k; 797 #endif 798 } 799 } 800 /* Loop Over B */ 801 for (i=0; i<b->mbs; i++) { 802 for (j=bi[i]; j<bi[i+1]; j++) { 803 row = i+rstart; 804 col = garray[bj[j]]; 805 key = row*Nbs + col + 1; 806 h1 = HASH(ht_size,key,tmp); 807 for (k=0; k<ht_size; k++) { 808 if (!HT[(h1+k)%ht_size]) { 809 HT[(h1+k)%ht_size] = key; 810 HD[(h1+k)%ht_size] = b->a + j*bs2; 811 break; 812 #if defined(PETSC_USE_INFO) 813 } else { 814 ct++; 815 #endif 816 } 817 } 818 #if defined(PETSC_USE_INFO) 819 if (k> max) max = k; 820 #endif 821 } 822 } 823 824 /* Print Summary */ 825 #if defined(PETSC_USE_INFO) 826 for (i=0,j=0; i<ht_size; i++) { 827 if (HT[i]) j++; 828 } 829 PetscCall(PetscInfo(mat,"Average Search = %5.2g,max search = %" PetscInt_FMT "\n",(!j) ? (double)0.0:(double)(((PetscReal)(ct+j))/(double)j),max)); 830 #endif 831 PetscFunctionReturn(0); 832 } 833 834 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 835 { 836 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 837 PetscInt nstash,reallocs; 838 839 PetscFunctionBegin; 840 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 841 842 PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range)); 843 PetscCall(MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs)); 844 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 845 PetscCall(PetscInfo(mat,"Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 846 PetscCall(MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs)); 847 PetscCall(PetscInfo(mat,"Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 848 PetscFunctionReturn(0); 849 } 850 851 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 852 { 853 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 854 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 855 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 856 PetscInt *row,*col; 857 PetscBool r1,r2,r3,other_disassembled; 858 MatScalar *val; 859 PetscMPIInt n; 860 861 PetscFunctionBegin; 862 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 863 if (!baij->donotstash && !mat->nooffprocentries) { 864 while (1) { 865 PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg)); 866 if (!flg) break; 867 868 for (i=0; i<n;) { 869 /* Now identify the consecutive vals belonging to the same row */ 870 for (j=i,rstart=row[j]; j<n; j++) { 871 if (row[j] != rstart) break; 872 } 873 if (j < n) ncols = j-i; 874 else ncols = n-i; 875 /* Now assemble all these values with a single function call */ 876 PetscCall(MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode)); 877 i = j; 878 } 879 } 880 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 881 /* Now process the block-stash. Since the values are stashed column-oriented, 882 set the roworiented flag to column oriented, and after MatSetValues() 883 restore the original flags */ 884 r1 = baij->roworiented; 885 r2 = a->roworiented; 886 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 887 888 baij->roworiented = PETSC_FALSE; 889 a->roworiented = PETSC_FALSE; 890 891 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 892 while (1) { 893 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg)); 894 if (!flg) break; 895 896 for (i=0; i<n;) { 897 /* Now identify the consecutive vals belonging to the same row */ 898 for (j=i,rstart=row[j]; j<n; j++) { 899 if (row[j] != rstart) break; 900 } 901 if (j < n) ncols = j-i; 902 else ncols = n-i; 903 PetscCall(MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode)); 904 i = j; 905 } 906 } 907 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 908 909 baij->roworiented = r1; 910 a->roworiented = r2; 911 912 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 913 } 914 915 PetscCall(MatAssemblyBegin(baij->A,mode)); 916 PetscCall(MatAssemblyEnd(baij->A,mode)); 917 918 /* determine if any processor has disassembled, if so we must 919 also disassemble ourselves, in order that we may reassemble. */ 920 /* 921 if nonzero structure of submatrix B cannot change then we know that 922 no processor disassembled thus we can skip this stuff 923 */ 924 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 925 PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat))); 926 if (mat->was_assembled && !other_disassembled) { 927 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 928 } 929 } 930 931 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 932 PetscCall(MatSetUpMultiply_MPIBAIJ(mat)); 933 } 934 PetscCall(MatAssemblyBegin(baij->B,mode)); 935 PetscCall(MatAssemblyEnd(baij->B,mode)); 936 937 #if defined(PETSC_USE_INFO) 938 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 939 PetscCall(PetscInfo(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct)); 940 941 baij->ht_total_ct = 0; 942 baij->ht_insert_ct = 0; 943 } 944 #endif 945 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 946 PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact)); 947 948 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 949 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 950 } 951 952 PetscCall(PetscFree2(baij->rowvalues,baij->rowindices)); 953 954 baij->rowvalues = NULL; 955 956 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 957 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 958 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 959 PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat))); 960 } 961 PetscFunctionReturn(0); 962 } 963 964 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer); 965 #include <petscdraw.h> 966 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 967 { 968 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 969 PetscMPIInt rank = baij->rank; 970 PetscInt bs = mat->rmap->bs; 971 PetscBool iascii,isdraw; 972 PetscViewer sviewer; 973 PetscViewerFormat format; 974 975 PetscFunctionBegin; 976 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 977 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 978 if (iascii) { 979 PetscCall(PetscViewerGetFormat(viewer,&format)); 980 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 981 MatInfo info; 982 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 983 PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 984 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 985 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", 986 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory)); 987 PetscCall(MatGetInfo(baij->A,MAT_LOCAL,&info)); 988 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 989 PetscCall(MatGetInfo(baij->B,MAT_LOCAL,&info)); 990 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 991 PetscCall(PetscViewerFlush(viewer)); 992 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 993 PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n")); 994 PetscCall(VecScatterView(baij->Mvctx,viewer)); 995 PetscFunctionReturn(0); 996 } else if (format == PETSC_VIEWER_ASCII_INFO) { 997 PetscCall(PetscViewerASCIIPrintf(viewer," block size is %" PetscInt_FMT "\n",bs)); 998 PetscFunctionReturn(0); 999 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1000 PetscFunctionReturn(0); 1001 } 1002 } 1003 1004 if (isdraw) { 1005 PetscDraw draw; 1006 PetscBool isnull; 1007 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 1008 PetscCall(PetscDrawIsNull(draw,&isnull)); 1009 if (isnull) PetscFunctionReturn(0); 1010 } 1011 1012 { 1013 /* assemble the entire matrix onto first processor. */ 1014 Mat A; 1015 Mat_SeqBAIJ *Aloc; 1016 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1017 MatScalar *a; 1018 const char *matname; 1019 1020 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1021 /* Perhaps this should be the type of mat? */ 1022 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A)); 1023 if (rank == 0) { 1024 PetscCall(MatSetSizes(A,M,N,M,N)); 1025 } else { 1026 PetscCall(MatSetSizes(A,0,0,M,N)); 1027 } 1028 PetscCall(MatSetType(A,MATMPIBAIJ)); 1029 PetscCall(MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL)); 1030 PetscCall(MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE)); 1031 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A)); 1032 1033 /* copy over the A part */ 1034 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1035 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1036 PetscCall(PetscMalloc1(bs,&rvals)); 1037 1038 for (i=0; i<mbs; i++) { 1039 rvals[0] = bs*(baij->rstartbs + i); 1040 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1041 for (j=ai[i]; j<ai[i+1]; j++) { 1042 col = (baij->cstartbs+aj[j])*bs; 1043 for (k=0; k<bs; k++) { 1044 PetscCall(MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES)); 1045 col++; a += bs; 1046 } 1047 } 1048 } 1049 /* copy over the B part */ 1050 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1051 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1052 for (i=0; i<mbs; i++) { 1053 rvals[0] = bs*(baij->rstartbs + i); 1054 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1055 for (j=ai[i]; j<ai[i+1]; j++) { 1056 col = baij->garray[aj[j]]*bs; 1057 for (k=0; k<bs; k++) { 1058 PetscCall(MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES)); 1059 col++; a += bs; 1060 } 1061 } 1062 } 1063 PetscCall(PetscFree(rvals)); 1064 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 1065 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 1066 /* 1067 Everyone has to call to draw the matrix since the graphics waits are 1068 synchronized across all processors that share the PetscDraw object 1069 */ 1070 PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1071 PetscCall(PetscObjectGetName((PetscObject)mat,&matname)); 1072 if (rank == 0) { 1073 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname)); 1074 PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer)); 1075 } 1076 PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1077 PetscCall(PetscViewerFlush(viewer)); 1078 PetscCall(MatDestroy(&A)); 1079 } 1080 PetscFunctionReturn(0); 1081 } 1082 1083 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1084 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1085 { 1086 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 1087 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)aij->A->data; 1088 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)aij->B->data; 1089 const PetscInt *garray = aij->garray; 1090 PetscInt header[4],M,N,m,rs,cs,bs,nz,cnt,i,j,ja,jb,k,l; 1091 PetscInt *rowlens,*colidxs; 1092 PetscScalar *matvals; 1093 1094 PetscFunctionBegin; 1095 PetscCall(PetscViewerSetUp(viewer)); 1096 1097 M = mat->rmap->N; 1098 N = mat->cmap->N; 1099 m = mat->rmap->n; 1100 rs = mat->rmap->rstart; 1101 cs = mat->cmap->rstart; 1102 bs = mat->rmap->bs; 1103 nz = bs*bs*(A->nz + B->nz); 1104 1105 /* write matrix header */ 1106 header[0] = MAT_FILE_CLASSID; 1107 header[1] = M; header[2] = N; header[3] = nz; 1108 PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat))); 1109 PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT)); 1110 1111 /* fill in and store row lengths */ 1112 PetscCall(PetscMalloc1(m,&rowlens)); 1113 for (cnt=0, i=0; i<A->mbs; i++) 1114 for (j=0; j<bs; j++) 1115 rowlens[cnt++] = bs*(A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]); 1116 PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT)); 1117 PetscCall(PetscFree(rowlens)); 1118 1119 /* fill in and store column indices */ 1120 PetscCall(PetscMalloc1(nz,&colidxs)); 1121 for (cnt=0, i=0; i<A->mbs; i++) { 1122 for (k=0; k<bs; k++) { 1123 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1124 if (garray[B->j[jb]] > cs/bs) break; 1125 for (l=0; l<bs; l++) 1126 colidxs[cnt++] = bs*garray[B->j[jb]] + l; 1127 } 1128 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1129 for (l=0; l<bs; l++) 1130 colidxs[cnt++] = bs*A->j[ja] + l + cs; 1131 for (; jb<B->i[i+1]; jb++) 1132 for (l=0; l<bs; l++) 1133 colidxs[cnt++] = bs*garray[B->j[jb]] + l; 1134 } 1135 } 1136 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1137 PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_INT)); 1138 PetscCall(PetscFree(colidxs)); 1139 1140 /* fill in and store nonzero values */ 1141 PetscCall(PetscMalloc1(nz,&matvals)); 1142 for (cnt=0, i=0; i<A->mbs; i++) { 1143 for (k=0; k<bs; k++) { 1144 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1145 if (garray[B->j[jb]] > cs/bs) break; 1146 for (l=0; l<bs; l++) 1147 matvals[cnt++] = B->a[bs*(bs*jb + l) + k]; 1148 } 1149 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1150 for (l=0; l<bs; l++) 1151 matvals[cnt++] = A->a[bs*(bs*ja + l) + k]; 1152 for (; jb<B->i[i+1]; jb++) 1153 for (l=0; l<bs; l++) 1154 matvals[cnt++] = B->a[bs*(bs*jb + l) + k]; 1155 } 1156 } 1157 PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_SCALAR)); 1158 PetscCall(PetscFree(matvals)); 1159 1160 /* write block size option to the viewer's .info file */ 1161 PetscCall(MatView_Binary_BlockSizes(mat,viewer)); 1162 PetscFunctionReturn(0); 1163 } 1164 1165 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1166 { 1167 PetscBool iascii,isdraw,issocket,isbinary; 1168 1169 PetscFunctionBegin; 1170 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1171 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1172 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket)); 1173 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1174 if (iascii || isdraw || issocket) { 1175 PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer)); 1176 } else if (isbinary) { 1177 PetscCall(MatView_MPIBAIJ_Binary(mat,viewer)); 1178 } 1179 PetscFunctionReturn(0); 1180 } 1181 1182 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1183 { 1184 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1185 1186 PetscFunctionBegin; 1187 #if defined(PETSC_USE_LOG) 1188 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 1189 #endif 1190 PetscCall(MatStashDestroy_Private(&mat->stash)); 1191 PetscCall(MatStashDestroy_Private(&mat->bstash)); 1192 PetscCall(MatDestroy(&baij->A)); 1193 PetscCall(MatDestroy(&baij->B)); 1194 #if defined(PETSC_USE_CTABLE) 1195 PetscCall(PetscTableDestroy(&baij->colmap)); 1196 #else 1197 PetscCall(PetscFree(baij->colmap)); 1198 #endif 1199 PetscCall(PetscFree(baij->garray)); 1200 PetscCall(VecDestroy(&baij->lvec)); 1201 PetscCall(VecScatterDestroy(&baij->Mvctx)); 1202 PetscCall(PetscFree2(baij->rowvalues,baij->rowindices)); 1203 PetscCall(PetscFree(baij->barray)); 1204 PetscCall(PetscFree2(baij->hd,baij->ht)); 1205 PetscCall(PetscFree(baij->rangebs)); 1206 PetscCall(PetscFree(mat->data)); 1207 1208 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL)); 1209 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL)); 1210 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL)); 1211 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL)); 1212 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL)); 1213 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL)); 1214 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL)); 1215 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL)); 1216 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL)); 1217 #if defined(PETSC_HAVE_HYPRE) 1218 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL)); 1219 #endif 1220 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL)); 1221 PetscFunctionReturn(0); 1222 } 1223 1224 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1225 { 1226 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1227 PetscInt nt; 1228 1229 PetscFunctionBegin; 1230 PetscCall(VecGetLocalSize(xx,&nt)); 1231 PetscCheck(nt == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1232 PetscCall(VecGetLocalSize(yy,&nt)); 1233 PetscCheck(nt == A->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1234 PetscCall(VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1235 PetscCall((*a->A->ops->mult)(a->A,xx,yy)); 1236 PetscCall(VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1237 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy)); 1238 PetscFunctionReturn(0); 1239 } 1240 1241 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1242 { 1243 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1244 1245 PetscFunctionBegin; 1246 PetscCall(VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1247 PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz)); 1248 PetscCall(VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1249 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz)); 1250 PetscFunctionReturn(0); 1251 } 1252 1253 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1254 { 1255 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1256 1257 PetscFunctionBegin; 1258 /* do nondiagonal part */ 1259 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1260 /* do local part */ 1261 PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy)); 1262 /* add partial results together */ 1263 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1264 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1265 PetscFunctionReturn(0); 1266 } 1267 1268 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1269 { 1270 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1271 1272 PetscFunctionBegin; 1273 /* do nondiagonal part */ 1274 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1275 /* do local part */ 1276 PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz)); 1277 /* add partial results together */ 1278 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1279 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1280 PetscFunctionReturn(0); 1281 } 1282 1283 /* 1284 This only works correctly for square matrices where the subblock A->A is the 1285 diagonal block 1286 */ 1287 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1288 { 1289 PetscFunctionBegin; 1290 PetscCheck(A->rmap->N == A->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1291 PetscCall(MatGetDiagonal(((Mat_MPIBAIJ*)A->data)->A,v)); 1292 PetscFunctionReturn(0); 1293 } 1294 1295 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1296 { 1297 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1298 1299 PetscFunctionBegin; 1300 PetscCall(MatScale(a->A,aa)); 1301 PetscCall(MatScale(a->B,aa)); 1302 PetscFunctionReturn(0); 1303 } 1304 1305 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1306 { 1307 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1308 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1309 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1310 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1311 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1312 1313 PetscFunctionBegin; 1314 PetscCheck(row >= brstart && row < brend,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1315 PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1316 mat->getrowactive = PETSC_TRUE; 1317 1318 if (!mat->rowvalues && (idx || v)) { 1319 /* 1320 allocate enough space to hold information from the longest row. 1321 */ 1322 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1323 PetscInt max = 1,mbs = mat->mbs,tmp; 1324 for (i=0; i<mbs; i++) { 1325 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1326 if (max < tmp) max = tmp; 1327 } 1328 PetscCall(PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices)); 1329 } 1330 lrow = row - brstart; 1331 1332 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1333 if (!v) {pvA = NULL; pvB = NULL;} 1334 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1335 PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA)); 1336 PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB)); 1337 nztot = nzA + nzB; 1338 1339 cmap = mat->garray; 1340 if (v || idx) { 1341 if (nztot) { 1342 /* Sort by increasing column numbers, assuming A and B already sorted */ 1343 PetscInt imark = -1; 1344 if (v) { 1345 *v = v_p = mat->rowvalues; 1346 for (i=0; i<nzB; i++) { 1347 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1348 else break; 1349 } 1350 imark = i; 1351 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1352 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1353 } 1354 if (idx) { 1355 *idx = idx_p = mat->rowindices; 1356 if (imark > -1) { 1357 for (i=0; i<imark; i++) { 1358 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1359 } 1360 } else { 1361 for (i=0; i<nzB; i++) { 1362 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1363 else break; 1364 } 1365 imark = i; 1366 } 1367 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1368 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1369 } 1370 } else { 1371 if (idx) *idx = NULL; 1372 if (v) *v = NULL; 1373 } 1374 } 1375 *nz = nztot; 1376 PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA)); 1377 PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB)); 1378 PetscFunctionReturn(0); 1379 } 1380 1381 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1382 { 1383 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1384 1385 PetscFunctionBegin; 1386 PetscCheck(baij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1387 baij->getrowactive = PETSC_FALSE; 1388 PetscFunctionReturn(0); 1389 } 1390 1391 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1392 { 1393 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1394 1395 PetscFunctionBegin; 1396 PetscCall(MatZeroEntries(l->A)); 1397 PetscCall(MatZeroEntries(l->B)); 1398 PetscFunctionReturn(0); 1399 } 1400 1401 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1402 { 1403 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1404 Mat A = a->A,B = a->B; 1405 PetscLogDouble isend[5],irecv[5]; 1406 1407 PetscFunctionBegin; 1408 info->block_size = (PetscReal)matin->rmap->bs; 1409 1410 PetscCall(MatGetInfo(A,MAT_LOCAL,info)); 1411 1412 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1413 isend[3] = info->memory; isend[4] = info->mallocs; 1414 1415 PetscCall(MatGetInfo(B,MAT_LOCAL,info)); 1416 1417 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1418 isend[3] += info->memory; isend[4] += info->mallocs; 1419 1420 if (flag == MAT_LOCAL) { 1421 info->nz_used = isend[0]; 1422 info->nz_allocated = isend[1]; 1423 info->nz_unneeded = isend[2]; 1424 info->memory = isend[3]; 1425 info->mallocs = isend[4]; 1426 } else if (flag == MAT_GLOBAL_MAX) { 1427 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin))); 1428 1429 info->nz_used = irecv[0]; 1430 info->nz_allocated = irecv[1]; 1431 info->nz_unneeded = irecv[2]; 1432 info->memory = irecv[3]; 1433 info->mallocs = irecv[4]; 1434 } else if (flag == MAT_GLOBAL_SUM) { 1435 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin))); 1436 1437 info->nz_used = irecv[0]; 1438 info->nz_allocated = irecv[1]; 1439 info->nz_unneeded = irecv[2]; 1440 info->memory = irecv[3]; 1441 info->mallocs = irecv[4]; 1442 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1443 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1444 info->fill_ratio_needed = 0; 1445 info->factor_mallocs = 0; 1446 PetscFunctionReturn(0); 1447 } 1448 1449 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1450 { 1451 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1452 1453 PetscFunctionBegin; 1454 switch (op) { 1455 case MAT_NEW_NONZERO_LOCATIONS: 1456 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1457 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1458 case MAT_KEEP_NONZERO_PATTERN: 1459 case MAT_NEW_NONZERO_LOCATION_ERR: 1460 MatCheckPreallocated(A,1); 1461 PetscCall(MatSetOption(a->A,op,flg)); 1462 PetscCall(MatSetOption(a->B,op,flg)); 1463 break; 1464 case MAT_ROW_ORIENTED: 1465 MatCheckPreallocated(A,1); 1466 a->roworiented = flg; 1467 1468 PetscCall(MatSetOption(a->A,op,flg)); 1469 PetscCall(MatSetOption(a->B,op,flg)); 1470 break; 1471 case MAT_FORCE_DIAGONAL_ENTRIES: 1472 case MAT_SORTED_FULL: 1473 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 1474 break; 1475 case MAT_IGNORE_OFF_PROC_ENTRIES: 1476 a->donotstash = flg; 1477 break; 1478 case MAT_USE_HASH_TABLE: 1479 a->ht_flag = flg; 1480 a->ht_fact = 1.39; 1481 break; 1482 case MAT_SYMMETRIC: 1483 case MAT_STRUCTURALLY_SYMMETRIC: 1484 case MAT_HERMITIAN: 1485 case MAT_SUBMAT_SINGLEIS: 1486 case MAT_SYMMETRY_ETERNAL: 1487 MatCheckPreallocated(A,1); 1488 PetscCall(MatSetOption(a->A,op,flg)); 1489 break; 1490 default: 1491 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1492 } 1493 PetscFunctionReturn(0); 1494 } 1495 1496 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1497 { 1498 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1499 Mat_SeqBAIJ *Aloc; 1500 Mat B; 1501 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1502 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1503 MatScalar *a; 1504 1505 PetscFunctionBegin; 1506 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1507 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 1508 PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M)); 1509 PetscCall(MatSetType(B,((PetscObject)A)->type_name)); 1510 /* Do not know preallocation information, but must set block size */ 1511 PetscCall(MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL)); 1512 } else { 1513 B = *matout; 1514 } 1515 1516 /* copy over the A part */ 1517 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1518 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1519 PetscCall(PetscMalloc1(bs,&rvals)); 1520 1521 for (i=0; i<mbs; i++) { 1522 rvals[0] = bs*(baij->rstartbs + i); 1523 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1524 for (j=ai[i]; j<ai[i+1]; j++) { 1525 col = (baij->cstartbs+aj[j])*bs; 1526 for (k=0; k<bs; k++) { 1527 PetscCall(MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES)); 1528 1529 col++; a += bs; 1530 } 1531 } 1532 } 1533 /* copy over the B part */ 1534 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1535 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1536 for (i=0; i<mbs; i++) { 1537 rvals[0] = bs*(baij->rstartbs + i); 1538 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1539 for (j=ai[i]; j<ai[i+1]; j++) { 1540 col = baij->garray[aj[j]]*bs; 1541 for (k=0; k<bs; k++) { 1542 PetscCall(MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES)); 1543 col++; 1544 a += bs; 1545 } 1546 } 1547 } 1548 PetscCall(PetscFree(rvals)); 1549 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 1550 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 1551 1552 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1553 else { 1554 PetscCall(MatHeaderMerge(A,&B)); 1555 } 1556 PetscFunctionReturn(0); 1557 } 1558 1559 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1560 { 1561 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1562 Mat a = baij->A,b = baij->B; 1563 PetscInt s1,s2,s3; 1564 1565 PetscFunctionBegin; 1566 PetscCall(MatGetLocalSize(mat,&s2,&s3)); 1567 if (rr) { 1568 PetscCall(VecGetLocalSize(rr,&s1)); 1569 PetscCheck(s1==s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1570 /* Overlap communication with computation. */ 1571 PetscCall(VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1572 } 1573 if (ll) { 1574 PetscCall(VecGetLocalSize(ll,&s1)); 1575 PetscCheck(s1==s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1576 PetscCall((*b->ops->diagonalscale)(b,ll,NULL)); 1577 } 1578 /* scale the diagonal block */ 1579 PetscCall((*a->ops->diagonalscale)(a,ll,rr)); 1580 1581 if (rr) { 1582 /* Do a scatter end and then right scale the off-diagonal block */ 1583 PetscCall(VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1584 PetscCall((*b->ops->diagonalscale)(b,NULL,baij->lvec)); 1585 } 1586 PetscFunctionReturn(0); 1587 } 1588 1589 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1590 { 1591 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1592 PetscInt *lrows; 1593 PetscInt r, len; 1594 PetscBool cong; 1595 1596 PetscFunctionBegin; 1597 /* get locally owned rows */ 1598 PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows)); 1599 /* fix right hand side if needed */ 1600 if (x && b) { 1601 const PetscScalar *xx; 1602 PetscScalar *bb; 1603 1604 PetscCall(VecGetArrayRead(x,&xx)); 1605 PetscCall(VecGetArray(b,&bb)); 1606 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 1607 PetscCall(VecRestoreArrayRead(x,&xx)); 1608 PetscCall(VecRestoreArray(b,&bb)); 1609 } 1610 1611 /* actually zap the local rows */ 1612 /* 1613 Zero the required rows. If the "diagonal block" of the matrix 1614 is square and the user wishes to set the diagonal we use separate 1615 code so that MatSetValues() is not called for each diagonal allocating 1616 new memory, thus calling lots of mallocs and slowing things down. 1617 1618 */ 1619 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1620 PetscCall(MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL)); 1621 PetscCall(MatHasCongruentLayouts(A,&cong)); 1622 if ((diag != 0.0) && cong) { 1623 PetscCall(MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL)); 1624 } else if (diag != 0.0) { 1625 PetscCall(MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL)); 1626 PetscCheck(!((Mat_SeqBAIJ*)l->A->data)->nonew,PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1627 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1628 for (r = 0; r < len; ++r) { 1629 const PetscInt row = lrows[r] + A->rmap->rstart; 1630 PetscCall(MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES)); 1631 } 1632 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 1633 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 1634 } else { 1635 PetscCall(MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL)); 1636 } 1637 PetscCall(PetscFree(lrows)); 1638 1639 /* only change matrix nonzero state if pattern was allowed to be changed */ 1640 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1641 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1642 PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A))); 1643 } 1644 PetscFunctionReturn(0); 1645 } 1646 1647 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1648 { 1649 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1650 PetscMPIInt n = A->rmap->n,p = 0; 1651 PetscInt i,j,k,r,len = 0,row,col,count; 1652 PetscInt *lrows,*owners = A->rmap->range; 1653 PetscSFNode *rrows; 1654 PetscSF sf; 1655 const PetscScalar *xx; 1656 PetscScalar *bb,*mask; 1657 Vec xmask,lmask; 1658 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)l->B->data; 1659 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1660 PetscScalar *aa; 1661 1662 PetscFunctionBegin; 1663 /* Create SF where leaves are input rows and roots are owned rows */ 1664 PetscCall(PetscMalloc1(n, &lrows)); 1665 for (r = 0; r < n; ++r) lrows[r] = -1; 1666 PetscCall(PetscMalloc1(N, &rrows)); 1667 for (r = 0; r < N; ++r) { 1668 const PetscInt idx = rows[r]; 1669 PetscCheck(idx >= 0 && A->rmap->N > idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N); 1670 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1671 PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p)); 1672 } 1673 rrows[r].rank = p; 1674 rrows[r].index = rows[r] - owners[p]; 1675 } 1676 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf)); 1677 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 1678 /* Collect flags for rows to be zeroed */ 1679 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 1680 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 1681 PetscCall(PetscSFDestroy(&sf)); 1682 /* Compress and put in row numbers */ 1683 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1684 /* zero diagonal part of matrix */ 1685 PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b)); 1686 /* handle off diagonal part of matrix */ 1687 PetscCall(MatCreateVecs(A,&xmask,NULL)); 1688 PetscCall(VecDuplicate(l->lvec,&lmask)); 1689 PetscCall(VecGetArray(xmask,&bb)); 1690 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1691 PetscCall(VecRestoreArray(xmask,&bb)); 1692 PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 1693 PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 1694 PetscCall(VecDestroy(&xmask)); 1695 if (x) { 1696 PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1697 PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1698 PetscCall(VecGetArrayRead(l->lvec,&xx)); 1699 PetscCall(VecGetArray(b,&bb)); 1700 } 1701 PetscCall(VecGetArray(lmask,&mask)); 1702 /* remove zeroed rows of off diagonal matrix */ 1703 for (i = 0; i < len; ++i) { 1704 row = lrows[i]; 1705 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1706 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 1707 for (k = 0; k < count; ++k) { 1708 aa[0] = 0.0; 1709 aa += bs; 1710 } 1711 } 1712 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1713 for (i = 0; i < l->B->rmap->N; ++i) { 1714 row = i/bs; 1715 for (j = baij->i[row]; j < baij->i[row+1]; ++j) { 1716 for (k = 0; k < bs; ++k) { 1717 col = bs*baij->j[j] + k; 1718 if (PetscAbsScalar(mask[col])) { 1719 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 1720 if (x) bb[i] -= aa[0]*xx[col]; 1721 aa[0] = 0.0; 1722 } 1723 } 1724 } 1725 } 1726 if (x) { 1727 PetscCall(VecRestoreArray(b,&bb)); 1728 PetscCall(VecRestoreArrayRead(l->lvec,&xx)); 1729 } 1730 PetscCall(VecRestoreArray(lmask,&mask)); 1731 PetscCall(VecDestroy(&lmask)); 1732 PetscCall(PetscFree(lrows)); 1733 1734 /* only change matrix nonzero state if pattern was allowed to be changed */ 1735 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1736 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1737 PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A))); 1738 } 1739 PetscFunctionReturn(0); 1740 } 1741 1742 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1743 { 1744 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1745 1746 PetscFunctionBegin; 1747 PetscCall(MatSetUnfactored(a->A)); 1748 PetscFunctionReturn(0); 1749 } 1750 1751 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 1752 1753 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1754 { 1755 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1756 Mat a,b,c,d; 1757 PetscBool flg; 1758 1759 PetscFunctionBegin; 1760 a = matA->A; b = matA->B; 1761 c = matB->A; d = matB->B; 1762 1763 PetscCall(MatEqual(a,c,&flg)); 1764 if (flg) { 1765 PetscCall(MatEqual(b,d,&flg)); 1766 } 1767 PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 1768 PetscFunctionReturn(0); 1769 } 1770 1771 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1772 { 1773 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1774 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 1775 1776 PetscFunctionBegin; 1777 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1778 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1779 PetscCall(MatCopy_Basic(A,B,str)); 1780 } else { 1781 PetscCall(MatCopy(a->A,b->A,str)); 1782 PetscCall(MatCopy(a->B,b->B,str)); 1783 } 1784 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1785 PetscFunctionReturn(0); 1786 } 1787 1788 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1789 { 1790 PetscFunctionBegin; 1791 PetscCall(MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL)); 1792 PetscFunctionReturn(0); 1793 } 1794 1795 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 1796 { 1797 PetscInt bs = Y->rmap->bs,m = Y->rmap->N/bs; 1798 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 1799 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 1800 1801 PetscFunctionBegin; 1802 PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz)); 1803 PetscFunctionReturn(0); 1804 } 1805 1806 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1807 { 1808 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 1809 PetscBLASInt bnz,one=1; 1810 Mat_SeqBAIJ *x,*y; 1811 PetscInt bs2 = Y->rmap->bs*Y->rmap->bs; 1812 1813 PetscFunctionBegin; 1814 if (str == SAME_NONZERO_PATTERN) { 1815 PetscScalar alpha = a; 1816 x = (Mat_SeqBAIJ*)xx->A->data; 1817 y = (Mat_SeqBAIJ*)yy->A->data; 1818 PetscCall(PetscBLASIntCast(x->nz*bs2,&bnz)); 1819 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1820 x = (Mat_SeqBAIJ*)xx->B->data; 1821 y = (Mat_SeqBAIJ*)yy->B->data; 1822 PetscCall(PetscBLASIntCast(x->nz*bs2,&bnz)); 1823 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1824 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1825 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1826 PetscCall(MatAXPY_Basic(Y,a,X,str)); 1827 } else { 1828 Mat B; 1829 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1830 PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d)); 1831 PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o)); 1832 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B)); 1833 PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name)); 1834 PetscCall(MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N)); 1835 PetscCall(MatSetBlockSizesFromMats(B,Y,Y)); 1836 PetscCall(MatSetType(B,MATMPIBAIJ)); 1837 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d)); 1838 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o)); 1839 PetscCall(MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o)); 1840 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1841 PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str)); 1842 PetscCall(MatHeaderMerge(Y,&B)); 1843 PetscCall(PetscFree(nnz_d)); 1844 PetscCall(PetscFree(nnz_o)); 1845 } 1846 PetscFunctionReturn(0); 1847 } 1848 1849 PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) 1850 { 1851 PetscFunctionBegin; 1852 if (PetscDefined(USE_COMPLEX)) { 1853 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1854 1855 PetscCall(MatConjugate_SeqBAIJ(a->A)); 1856 PetscCall(MatConjugate_SeqBAIJ(a->B)); 1857 } 1858 PetscFunctionReturn(0); 1859 } 1860 1861 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1862 { 1863 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1864 1865 PetscFunctionBegin; 1866 PetscCall(MatRealPart(a->A)); 1867 PetscCall(MatRealPart(a->B)); 1868 PetscFunctionReturn(0); 1869 } 1870 1871 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1872 { 1873 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1874 1875 PetscFunctionBegin; 1876 PetscCall(MatImaginaryPart(a->A)); 1877 PetscCall(MatImaginaryPart(a->B)); 1878 PetscFunctionReturn(0); 1879 } 1880 1881 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1882 { 1883 IS iscol_local; 1884 PetscInt csize; 1885 1886 PetscFunctionBegin; 1887 PetscCall(ISGetLocalSize(iscol,&csize)); 1888 if (call == MAT_REUSE_MATRIX) { 1889 PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local)); 1890 PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1891 } else { 1892 PetscCall(ISAllGather(iscol,&iscol_local)); 1893 } 1894 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat)); 1895 if (call == MAT_INITIAL_MATRIX) { 1896 PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local)); 1897 PetscCall(ISDestroy(&iscol_local)); 1898 } 1899 PetscFunctionReturn(0); 1900 } 1901 1902 /* 1903 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1904 in local and then by concatenating the local matrices the end result. 1905 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1906 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1907 */ 1908 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 1909 { 1910 PetscMPIInt rank,size; 1911 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 1912 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 1913 Mat M,Mreuse; 1914 MatScalar *vwork,*aa; 1915 MPI_Comm comm; 1916 IS isrow_new, iscol_new; 1917 Mat_SeqBAIJ *aij; 1918 1919 PetscFunctionBegin; 1920 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 1921 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 1922 PetscCallMPI(MPI_Comm_size(comm,&size)); 1923 /* The compression and expansion should be avoided. Doesn't point 1924 out errors, might change the indices, hence buggey */ 1925 PetscCall(ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new)); 1926 PetscCall(ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new)); 1927 1928 if (call == MAT_REUSE_MATRIX) { 1929 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse)); 1930 PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1931 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse)); 1932 } else { 1933 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse)); 1934 } 1935 PetscCall(ISDestroy(&isrow_new)); 1936 PetscCall(ISDestroy(&iscol_new)); 1937 /* 1938 m - number of local rows 1939 n - number of columns (same on all processors) 1940 rstart - first row in new global matrix generated 1941 */ 1942 PetscCall(MatGetBlockSize(mat,&bs)); 1943 PetscCall(MatGetSize(Mreuse,&m,&n)); 1944 m = m/bs; 1945 n = n/bs; 1946 1947 if (call == MAT_INITIAL_MATRIX) { 1948 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1949 ii = aij->i; 1950 jj = aij->j; 1951 1952 /* 1953 Determine the number of non-zeros in the diagonal and off-diagonal 1954 portions of the matrix in order to do correct preallocation 1955 */ 1956 1957 /* first get start and end of "diagonal" columns */ 1958 if (csize == PETSC_DECIDE) { 1959 PetscCall(ISGetSize(isrow,&mglobal)); 1960 if (mglobal == n*bs) { /* square matrix */ 1961 nlocal = m; 1962 } else { 1963 nlocal = n/size + ((n % size) > rank); 1964 } 1965 } else { 1966 nlocal = csize/bs; 1967 } 1968 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 1969 rstart = rend - nlocal; 1970 PetscCheck(rank != size - 1 || rend == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n); 1971 1972 /* next, compute all the lengths */ 1973 PetscCall(PetscMalloc2(m+1,&dlens,m+1,&olens)); 1974 for (i=0; i<m; i++) { 1975 jend = ii[i+1] - ii[i]; 1976 olen = 0; 1977 dlen = 0; 1978 for (j=0; j<jend; j++) { 1979 if (*jj < rstart || *jj >= rend) olen++; 1980 else dlen++; 1981 jj++; 1982 } 1983 olens[i] = olen; 1984 dlens[i] = dlen; 1985 } 1986 PetscCall(MatCreate(comm,&M)); 1987 PetscCall(MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n)); 1988 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 1989 PetscCall(MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens)); 1990 PetscCall(MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens)); 1991 PetscCall(PetscFree2(dlens,olens)); 1992 } else { 1993 PetscInt ml,nl; 1994 1995 M = *newmat; 1996 PetscCall(MatGetLocalSize(M,&ml,&nl)); 1997 PetscCheck(ml == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1998 PetscCall(MatZeroEntries(M)); 1999 /* 2000 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2001 rather than the slower MatSetValues(). 2002 */ 2003 M->was_assembled = PETSC_TRUE; 2004 M->assembled = PETSC_FALSE; 2005 } 2006 PetscCall(MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE)); 2007 PetscCall(MatGetOwnershipRange(M,&rstart,&rend)); 2008 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2009 ii = aij->i; 2010 jj = aij->j; 2011 aa = aij->a; 2012 for (i=0; i<m; i++) { 2013 row = rstart/bs + i; 2014 nz = ii[i+1] - ii[i]; 2015 cwork = jj; jj += nz; 2016 vwork = aa; aa += nz*bs*bs; 2017 PetscCall(MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES)); 2018 } 2019 2020 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 2021 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 2022 *newmat = M; 2023 2024 /* save submatrix used in processor for next request */ 2025 if (call == MAT_INITIAL_MATRIX) { 2026 PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse)); 2027 PetscCall(PetscObjectDereference((PetscObject)Mreuse)); 2028 } 2029 PetscFunctionReturn(0); 2030 } 2031 2032 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2033 { 2034 MPI_Comm comm,pcomm; 2035 PetscInt clocal_size,nrows; 2036 const PetscInt *rows; 2037 PetscMPIInt size; 2038 IS crowp,lcolp; 2039 2040 PetscFunctionBegin; 2041 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 2042 /* make a collective version of 'rowp' */ 2043 PetscCall(PetscObjectGetComm((PetscObject)rowp,&pcomm)); 2044 if (pcomm==comm) { 2045 crowp = rowp; 2046 } else { 2047 PetscCall(ISGetSize(rowp,&nrows)); 2048 PetscCall(ISGetIndices(rowp,&rows)); 2049 PetscCall(ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp)); 2050 PetscCall(ISRestoreIndices(rowp,&rows)); 2051 } 2052 PetscCall(ISSetPermutation(crowp)); 2053 /* make a local version of 'colp' */ 2054 PetscCall(PetscObjectGetComm((PetscObject)colp,&pcomm)); 2055 PetscCallMPI(MPI_Comm_size(pcomm,&size)); 2056 if (size==1) { 2057 lcolp = colp; 2058 } else { 2059 PetscCall(ISAllGather(colp,&lcolp)); 2060 } 2061 PetscCall(ISSetPermutation(lcolp)); 2062 /* now we just get the submatrix */ 2063 PetscCall(MatGetLocalSize(A,NULL,&clocal_size)); 2064 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B)); 2065 /* clean up */ 2066 if (pcomm!=comm) { 2067 PetscCall(ISDestroy(&crowp)); 2068 } 2069 if (size>1) { 2070 PetscCall(ISDestroy(&lcolp)); 2071 } 2072 PetscFunctionReturn(0); 2073 } 2074 2075 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2076 { 2077 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2078 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2079 2080 PetscFunctionBegin; 2081 if (nghosts) *nghosts = B->nbs; 2082 if (ghosts) *ghosts = baij->garray; 2083 PetscFunctionReturn(0); 2084 } 2085 2086 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2087 { 2088 Mat B; 2089 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2090 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2091 Mat_SeqAIJ *b; 2092 PetscMPIInt size,rank,*recvcounts = NULL,*displs = NULL; 2093 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2094 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2095 2096 PetscFunctionBegin; 2097 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 2098 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank)); 2099 2100 /* ---------------------------------------------------------------- 2101 Tell every processor the number of nonzeros per row 2102 */ 2103 PetscCall(PetscMalloc1(A->rmap->N/bs,&lens)); 2104 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2105 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2106 } 2107 PetscCall(PetscMalloc1(2*size,&recvcounts)); 2108 displs = recvcounts + size; 2109 for (i=0; i<size; i++) { 2110 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2111 displs[i] = A->rmap->range[i]/bs; 2112 } 2113 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A))); 2114 /* --------------------------------------------------------------- 2115 Create the sequential matrix of the same type as the local block diagonal 2116 */ 2117 PetscCall(MatCreate(PETSC_COMM_SELF,&B)); 2118 PetscCall(MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE)); 2119 PetscCall(MatSetType(B,MATSEQAIJ)); 2120 PetscCall(MatSeqAIJSetPreallocation(B,0,lens)); 2121 b = (Mat_SeqAIJ*)B->data; 2122 2123 /*-------------------------------------------------------------------- 2124 Copy my part of matrix column indices over 2125 */ 2126 sendcount = ad->nz + bd->nz; 2127 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2128 a_jsendbuf = ad->j; 2129 b_jsendbuf = bd->j; 2130 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2131 cnt = 0; 2132 for (i=0; i<n; i++) { 2133 2134 /* put in lower diagonal portion */ 2135 m = bd->i[i+1] - bd->i[i]; 2136 while (m > 0) { 2137 /* is it above diagonal (in bd (compressed) numbering) */ 2138 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2139 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2140 m--; 2141 } 2142 2143 /* put in diagonal portion */ 2144 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2145 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2146 } 2147 2148 /* put in upper diagonal portion */ 2149 while (m-- > 0) { 2150 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2151 } 2152 } 2153 PetscCheck(cnt == sendcount,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT,sendcount,cnt); 2154 2155 /*-------------------------------------------------------------------- 2156 Gather all column indices to all processors 2157 */ 2158 for (i=0; i<size; i++) { 2159 recvcounts[i] = 0; 2160 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2161 recvcounts[i] += lens[j]; 2162 } 2163 } 2164 displs[0] = 0; 2165 for (i=1; i<size; i++) { 2166 displs[i] = displs[i-1] + recvcounts[i-1]; 2167 } 2168 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A))); 2169 /*-------------------------------------------------------------------- 2170 Assemble the matrix into useable form (note numerical values not yet set) 2171 */ 2172 /* set the b->ilen (length of each row) values */ 2173 PetscCall(PetscArraycpy(b->ilen,lens,A->rmap->N/bs)); 2174 /* set the b->i indices */ 2175 b->i[0] = 0; 2176 for (i=1; i<=A->rmap->N/bs; i++) { 2177 b->i[i] = b->i[i-1] + lens[i-1]; 2178 } 2179 PetscCall(PetscFree(lens)); 2180 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 2181 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 2182 PetscCall(PetscFree(recvcounts)); 2183 2184 if (A->symmetric) { 2185 PetscCall(MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE)); 2186 } else if (A->hermitian) { 2187 PetscCall(MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE)); 2188 } else if (A->structurally_symmetric) { 2189 PetscCall(MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE)); 2190 } 2191 *newmat = B; 2192 PetscFunctionReturn(0); 2193 } 2194 2195 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2196 { 2197 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2198 Vec bb1 = NULL; 2199 2200 PetscFunctionBegin; 2201 if (flag == SOR_APPLY_UPPER) { 2202 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2203 PetscFunctionReturn(0); 2204 } 2205 2206 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2207 PetscCall(VecDuplicate(bb,&bb1)); 2208 } 2209 2210 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2211 if (flag & SOR_ZERO_INITIAL_GUESS) { 2212 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2213 its--; 2214 } 2215 2216 while (its--) { 2217 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2218 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2219 2220 /* update rhs: bb1 = bb - B*x */ 2221 PetscCall(VecScale(mat->lvec,-1.0)); 2222 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 2223 2224 /* local sweep */ 2225 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx)); 2226 } 2227 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2228 if (flag & SOR_ZERO_INITIAL_GUESS) { 2229 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2230 its--; 2231 } 2232 while (its--) { 2233 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2234 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2235 2236 /* update rhs: bb1 = bb - B*x */ 2237 PetscCall(VecScale(mat->lvec,-1.0)); 2238 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 2239 2240 /* local sweep */ 2241 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx)); 2242 } 2243 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2244 if (flag & SOR_ZERO_INITIAL_GUESS) { 2245 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2246 its--; 2247 } 2248 while (its--) { 2249 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2250 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2251 2252 /* update rhs: bb1 = bb - B*x */ 2253 PetscCall(VecScale(mat->lvec,-1.0)); 2254 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 2255 2256 /* local sweep */ 2257 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx)); 2258 } 2259 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2260 2261 PetscCall(VecDestroy(&bb1)); 2262 PetscFunctionReturn(0); 2263 } 2264 2265 PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A,PetscInt type,PetscReal *reductions) 2266 { 2267 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data; 2268 PetscInt m,N,i,*garray = aij->garray; 2269 PetscInt ib,jb,bs = A->rmap->bs; 2270 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data; 2271 MatScalar *a_val = a_aij->a; 2272 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data; 2273 MatScalar *b_val = b_aij->a; 2274 PetscReal *work; 2275 2276 PetscFunctionBegin; 2277 PetscCall(MatGetSize(A,&m,&N)); 2278 PetscCall(PetscCalloc1(N,&work)); 2279 if (type == NORM_2) { 2280 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2281 for (jb=0; jb<bs; jb++) { 2282 for (ib=0; ib<bs; ib++) { 2283 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2284 a_val++; 2285 } 2286 } 2287 } 2288 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2289 for (jb=0; jb<bs; jb++) { 2290 for (ib=0; ib<bs; ib++) { 2291 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2292 b_val++; 2293 } 2294 } 2295 } 2296 } else if (type == NORM_1) { 2297 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2298 for (jb=0; jb<bs; jb++) { 2299 for (ib=0; ib<bs; ib++) { 2300 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2301 a_val++; 2302 } 2303 } 2304 } 2305 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2306 for (jb=0; jb<bs; jb++) { 2307 for (ib=0; ib<bs; ib++) { 2308 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2309 b_val++; 2310 } 2311 } 2312 } 2313 } else if (type == NORM_INFINITY) { 2314 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2315 for (jb=0; jb<bs; jb++) { 2316 for (ib=0; ib<bs; ib++) { 2317 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2318 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2319 a_val++; 2320 } 2321 } 2322 } 2323 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2324 for (jb=0; jb<bs; jb++) { 2325 for (ib=0; ib<bs; ib++) { 2326 int col = garray[b_aij->j[i]] * bs + jb; 2327 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2328 b_val++; 2329 } 2330 } 2331 } 2332 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2333 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2334 for (jb=0; jb<bs; jb++) { 2335 for (ib=0; ib<bs; ib++) { 2336 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 2337 a_val++; 2338 } 2339 } 2340 } 2341 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2342 for (jb=0; jb<bs; jb++) { 2343 for (ib=0; ib<bs; ib++) { 2344 work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val); 2345 b_val++; 2346 } 2347 } 2348 } 2349 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2350 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2351 for (jb=0; jb<bs; jb++) { 2352 for (ib=0; ib<bs; ib++) { 2353 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 2354 a_val++; 2355 } 2356 } 2357 } 2358 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2359 for (jb=0; jb<bs; jb++) { 2360 for (ib=0; ib<bs; ib++) { 2361 work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val); 2362 b_val++; 2363 } 2364 } 2365 } 2366 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 2367 if (type == NORM_INFINITY) { 2368 PetscCall(MPIU_Allreduce(work,reductions,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A))); 2369 } else { 2370 PetscCall(MPIU_Allreduce(work,reductions,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 2371 } 2372 PetscCall(PetscFree(work)); 2373 if (type == NORM_2) { 2374 for (i=0; i<N; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2375 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2376 for (i=0; i<N; i++) reductions[i] /= m; 2377 } 2378 PetscFunctionReturn(0); 2379 } 2380 2381 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2382 { 2383 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2384 2385 PetscFunctionBegin; 2386 PetscCall(MatInvertBlockDiagonal(a->A,values)); 2387 A->factorerrortype = a->A->factorerrortype; 2388 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2389 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2390 PetscFunctionReturn(0); 2391 } 2392 2393 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a) 2394 { 2395 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ*)Y->data; 2396 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)maij->A->data; 2397 2398 PetscFunctionBegin; 2399 if (!Y->preallocated) { 2400 PetscCall(MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL)); 2401 } else if (!aij->nz) { 2402 PetscInt nonew = aij->nonew; 2403 PetscCall(MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL)); 2404 aij->nonew = nonew; 2405 } 2406 PetscCall(MatShift_Basic(Y,a)); 2407 PetscFunctionReturn(0); 2408 } 2409 2410 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool *missing,PetscInt *d) 2411 { 2412 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2413 2414 PetscFunctionBegin; 2415 PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2416 PetscCall(MatMissingDiagonal(a->A,missing,d)); 2417 if (d) { 2418 PetscInt rstart; 2419 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 2420 *d += rstart/A->rmap->bs; 2421 2422 } 2423 PetscFunctionReturn(0); 2424 } 2425 2426 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2427 { 2428 PetscFunctionBegin; 2429 *a = ((Mat_MPIBAIJ*)A->data)->A; 2430 PetscFunctionReturn(0); 2431 } 2432 2433 /* -------------------------------------------------------------------*/ 2434 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2435 MatGetRow_MPIBAIJ, 2436 MatRestoreRow_MPIBAIJ, 2437 MatMult_MPIBAIJ, 2438 /* 4*/ MatMultAdd_MPIBAIJ, 2439 MatMultTranspose_MPIBAIJ, 2440 MatMultTransposeAdd_MPIBAIJ, 2441 NULL, 2442 NULL, 2443 NULL, 2444 /*10*/ NULL, 2445 NULL, 2446 NULL, 2447 MatSOR_MPIBAIJ, 2448 MatTranspose_MPIBAIJ, 2449 /*15*/ MatGetInfo_MPIBAIJ, 2450 MatEqual_MPIBAIJ, 2451 MatGetDiagonal_MPIBAIJ, 2452 MatDiagonalScale_MPIBAIJ, 2453 MatNorm_MPIBAIJ, 2454 /*20*/ MatAssemblyBegin_MPIBAIJ, 2455 MatAssemblyEnd_MPIBAIJ, 2456 MatSetOption_MPIBAIJ, 2457 MatZeroEntries_MPIBAIJ, 2458 /*24*/ MatZeroRows_MPIBAIJ, 2459 NULL, 2460 NULL, 2461 NULL, 2462 NULL, 2463 /*29*/ MatSetUp_MPIBAIJ, 2464 NULL, 2465 NULL, 2466 MatGetDiagonalBlock_MPIBAIJ, 2467 NULL, 2468 /*34*/ MatDuplicate_MPIBAIJ, 2469 NULL, 2470 NULL, 2471 NULL, 2472 NULL, 2473 /*39*/ MatAXPY_MPIBAIJ, 2474 MatCreateSubMatrices_MPIBAIJ, 2475 MatIncreaseOverlap_MPIBAIJ, 2476 MatGetValues_MPIBAIJ, 2477 MatCopy_MPIBAIJ, 2478 /*44*/ NULL, 2479 MatScale_MPIBAIJ, 2480 MatShift_MPIBAIJ, 2481 NULL, 2482 MatZeroRowsColumns_MPIBAIJ, 2483 /*49*/ NULL, 2484 NULL, 2485 NULL, 2486 NULL, 2487 NULL, 2488 /*54*/ MatFDColoringCreate_MPIXAIJ, 2489 NULL, 2490 MatSetUnfactored_MPIBAIJ, 2491 MatPermute_MPIBAIJ, 2492 MatSetValuesBlocked_MPIBAIJ, 2493 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2494 MatDestroy_MPIBAIJ, 2495 MatView_MPIBAIJ, 2496 NULL, 2497 NULL, 2498 /*64*/ NULL, 2499 NULL, 2500 NULL, 2501 NULL, 2502 NULL, 2503 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2504 NULL, 2505 NULL, 2506 NULL, 2507 NULL, 2508 /*74*/ NULL, 2509 MatFDColoringApply_BAIJ, 2510 NULL, 2511 NULL, 2512 NULL, 2513 /*79*/ NULL, 2514 NULL, 2515 NULL, 2516 NULL, 2517 MatLoad_MPIBAIJ, 2518 /*84*/ NULL, 2519 NULL, 2520 NULL, 2521 NULL, 2522 NULL, 2523 /*89*/ NULL, 2524 NULL, 2525 NULL, 2526 NULL, 2527 NULL, 2528 /*94*/ NULL, 2529 NULL, 2530 NULL, 2531 NULL, 2532 NULL, 2533 /*99*/ NULL, 2534 NULL, 2535 NULL, 2536 MatConjugate_MPIBAIJ, 2537 NULL, 2538 /*104*/NULL, 2539 MatRealPart_MPIBAIJ, 2540 MatImaginaryPart_MPIBAIJ, 2541 NULL, 2542 NULL, 2543 /*109*/NULL, 2544 NULL, 2545 NULL, 2546 NULL, 2547 MatMissingDiagonal_MPIBAIJ, 2548 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2549 NULL, 2550 MatGetGhosts_MPIBAIJ, 2551 NULL, 2552 NULL, 2553 /*119*/NULL, 2554 NULL, 2555 NULL, 2556 NULL, 2557 MatGetMultiProcBlock_MPIBAIJ, 2558 /*124*/NULL, 2559 MatGetColumnReductions_MPIBAIJ, 2560 MatInvertBlockDiagonal_MPIBAIJ, 2561 NULL, 2562 NULL, 2563 /*129*/ NULL, 2564 NULL, 2565 NULL, 2566 NULL, 2567 NULL, 2568 /*134*/ NULL, 2569 NULL, 2570 NULL, 2571 NULL, 2572 NULL, 2573 /*139*/ MatSetBlockSizes_Default, 2574 NULL, 2575 NULL, 2576 MatFDColoringSetUp_MPIXAIJ, 2577 NULL, 2578 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ, 2579 NULL, 2580 NULL, 2581 NULL 2582 }; 2583 2584 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 2585 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 2586 2587 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2588 { 2589 PetscInt m,rstart,cstart,cend; 2590 PetscInt i,j,dlen,olen,nz,nz_max=0,*d_nnz=NULL,*o_nnz=NULL; 2591 const PetscInt *JJ =NULL; 2592 PetscScalar *values=NULL; 2593 PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented; 2594 PetscBool nooffprocentries; 2595 2596 PetscFunctionBegin; 2597 PetscCall(PetscLayoutSetBlockSize(B->rmap,bs)); 2598 PetscCall(PetscLayoutSetBlockSize(B->cmap,bs)); 2599 PetscCall(PetscLayoutSetUp(B->rmap)); 2600 PetscCall(PetscLayoutSetUp(B->cmap)); 2601 PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs)); 2602 m = B->rmap->n/bs; 2603 rstart = B->rmap->rstart/bs; 2604 cstart = B->cmap->rstart/bs; 2605 cend = B->cmap->rend/bs; 2606 2607 PetscCheck(!ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %" PetscInt_FMT,ii[0]); 2608 PetscCall(PetscMalloc2(m,&d_nnz,m,&o_nnz)); 2609 for (i=0; i<m; i++) { 2610 nz = ii[i+1] - ii[i]; 2611 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT,i,nz); 2612 nz_max = PetscMax(nz_max,nz); 2613 dlen = 0; 2614 olen = 0; 2615 JJ = jj + ii[i]; 2616 for (j=0; j<nz; j++) { 2617 if (*JJ < cstart || *JJ >= cend) olen++; 2618 else dlen++; 2619 JJ++; 2620 } 2621 d_nnz[i] = dlen; 2622 o_nnz[i] = olen; 2623 } 2624 PetscCall(MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz)); 2625 PetscCall(PetscFree2(d_nnz,o_nnz)); 2626 2627 values = (PetscScalar*)V; 2628 if (!values) { 2629 PetscCall(PetscCalloc1(bs*bs*nz_max,&values)); 2630 } 2631 for (i=0; i<m; i++) { 2632 PetscInt row = i + rstart; 2633 PetscInt ncols = ii[i+1] - ii[i]; 2634 const PetscInt *icols = jj + ii[i]; 2635 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2636 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2637 PetscCall(MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES)); 2638 } else { /* block ordering does not match so we can only insert one block at a time. */ 2639 PetscInt j; 2640 for (j=0; j<ncols; j++) { 2641 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2642 PetscCall(MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES)); 2643 } 2644 } 2645 } 2646 2647 if (!V) PetscCall(PetscFree(values)); 2648 nooffprocentries = B->nooffprocentries; 2649 B->nooffprocentries = PETSC_TRUE; 2650 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 2651 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 2652 B->nooffprocentries = nooffprocentries; 2653 2654 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 2655 PetscFunctionReturn(0); 2656 } 2657 2658 /*@C 2659 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values 2660 2661 Collective 2662 2663 Input Parameters: 2664 + B - the matrix 2665 . bs - the block size 2666 . i - the indices into j for the start of each local row (starts with zero) 2667 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2668 - v - optional values in the matrix 2669 2670 Level: advanced 2671 2672 Notes: 2673 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 2674 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 2675 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2676 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2677 block column and the second index is over columns within a block. 2678 2679 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2680 2681 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MPIBAIJ` 2682 @*/ 2683 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2684 { 2685 PetscFunctionBegin; 2686 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2687 PetscValidType(B,1); 2688 PetscValidLogicalCollectiveInt(B,bs,2); 2689 PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v)); 2690 PetscFunctionReturn(0); 2691 } 2692 2693 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2694 { 2695 Mat_MPIBAIJ *b; 2696 PetscInt i; 2697 PetscMPIInt size; 2698 2699 PetscFunctionBegin; 2700 PetscCall(MatSetBlockSize(B,PetscAbs(bs))); 2701 PetscCall(PetscLayoutSetUp(B->rmap)); 2702 PetscCall(PetscLayoutSetUp(B->cmap)); 2703 PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs)); 2704 2705 if (d_nnz) { 2706 for (i=0; i<B->rmap->n/bs; i++) { 2707 PetscCheck(d_nnz[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,d_nnz[i]); 2708 } 2709 } 2710 if (o_nnz) { 2711 for (i=0; i<B->rmap->n/bs; i++) { 2712 PetscCheck(o_nnz[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,o_nnz[i]); 2713 } 2714 } 2715 2716 b = (Mat_MPIBAIJ*)B->data; 2717 b->bs2 = bs*bs; 2718 b->mbs = B->rmap->n/bs; 2719 b->nbs = B->cmap->n/bs; 2720 b->Mbs = B->rmap->N/bs; 2721 b->Nbs = B->cmap->N/bs; 2722 2723 for (i=0; i<=b->size; i++) { 2724 b->rangebs[i] = B->rmap->range[i]/bs; 2725 } 2726 b->rstartbs = B->rmap->rstart/bs; 2727 b->rendbs = B->rmap->rend/bs; 2728 b->cstartbs = B->cmap->rstart/bs; 2729 b->cendbs = B->cmap->rend/bs; 2730 2731 #if defined(PETSC_USE_CTABLE) 2732 PetscCall(PetscTableDestroy(&b->colmap)); 2733 #else 2734 PetscCall(PetscFree(b->colmap)); 2735 #endif 2736 PetscCall(PetscFree(b->garray)); 2737 PetscCall(VecDestroy(&b->lvec)); 2738 PetscCall(VecScatterDestroy(&b->Mvctx)); 2739 2740 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2741 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 2742 PetscCall(MatDestroy(&b->B)); 2743 PetscCall(MatCreate(PETSC_COMM_SELF,&b->B)); 2744 PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0)); 2745 PetscCall(MatSetType(b->B,MATSEQBAIJ)); 2746 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B)); 2747 2748 if (!B->preallocated) { 2749 PetscCall(MatCreate(PETSC_COMM_SELF,&b->A)); 2750 PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n)); 2751 PetscCall(MatSetType(b->A,MATSEQBAIJ)); 2752 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A)); 2753 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash)); 2754 } 2755 2756 PetscCall(MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz)); 2757 PetscCall(MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz)); 2758 B->preallocated = PETSC_TRUE; 2759 B->was_assembled = PETSC_FALSE; 2760 B->assembled = PETSC_FALSE; 2761 PetscFunctionReturn(0); 2762 } 2763 2764 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2765 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2766 2767 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 2768 { 2769 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2770 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2771 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2772 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2773 2774 PetscFunctionBegin; 2775 PetscCall(PetscMalloc1(M+1,&ii)); 2776 ii[0] = 0; 2777 for (i=0; i<M; i++) { 2778 PetscCheck((id[i+1] - id[i]) >= 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT,i,id[i],id[i+1]); 2779 PetscCheck((io[i+1] - io[i]) >= 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT,i,io[i],io[i+1]); 2780 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2781 /* remove one from count of matrix has diagonal */ 2782 for (j=id[i]; j<id[i+1]; j++) { 2783 if (jd[j] == i) {ii[i+1]--;break;} 2784 } 2785 } 2786 PetscCall(PetscMalloc1(ii[M],&jj)); 2787 cnt = 0; 2788 for (i=0; i<M; i++) { 2789 for (j=io[i]; j<io[i+1]; j++) { 2790 if (garray[jo[j]] > rstart) break; 2791 jj[cnt++] = garray[jo[j]]; 2792 } 2793 for (k=id[i]; k<id[i+1]; k++) { 2794 if (jd[k] != i) { 2795 jj[cnt++] = rstart + jd[k]; 2796 } 2797 } 2798 for (; j<io[i+1]; j++) { 2799 jj[cnt++] = garray[jo[j]]; 2800 } 2801 } 2802 PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj)); 2803 PetscFunctionReturn(0); 2804 } 2805 2806 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2807 2808 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 2809 2810 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 2811 { 2812 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2813 Mat_MPIAIJ *b; 2814 Mat B; 2815 2816 PetscFunctionBegin; 2817 PetscCheck(A->assembled,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 2818 2819 if (reuse == MAT_REUSE_MATRIX) { 2820 B = *newmat; 2821 } else { 2822 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 2823 PetscCall(MatSetType(B,MATMPIAIJ)); 2824 PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 2825 PetscCall(MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs)); 2826 PetscCall(MatSeqAIJSetPreallocation(B,0,NULL)); 2827 PetscCall(MatMPIAIJSetPreallocation(B,0,NULL,0,NULL)); 2828 } 2829 b = (Mat_MPIAIJ*) B->data; 2830 2831 if (reuse == MAT_REUSE_MATRIX) { 2832 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A)); 2833 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B)); 2834 } else { 2835 PetscCall(MatDestroy(&b->A)); 2836 PetscCall(MatDestroy(&b->B)); 2837 PetscCall(MatDisAssemble_MPIBAIJ(A)); 2838 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A)); 2839 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B)); 2840 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 2841 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 2842 } 2843 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 2844 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 2845 2846 if (reuse == MAT_INPLACE_MATRIX) { 2847 PetscCall(MatHeaderReplace(A,&B)); 2848 } else { 2849 *newmat = B; 2850 } 2851 PetscFunctionReturn(0); 2852 } 2853 2854 /*MC 2855 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2856 2857 Options Database Keys: 2858 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2859 . -mat_block_size <bs> - set the blocksize used to store the matrix 2860 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 2861 - -mat_use_hash_table <fact> - set hash table factor 2862 2863 Level: beginner 2864 2865 Notes: 2866 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 2867 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 2868 2869 .seealso: `MatCreateBAIJ` 2870 M*/ 2871 2872 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 2873 2874 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2875 { 2876 Mat_MPIBAIJ *b; 2877 PetscBool flg = PETSC_FALSE; 2878 2879 PetscFunctionBegin; 2880 PetscCall(PetscNewLog(B,&b)); 2881 B->data = (void*)b; 2882 2883 PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps))); 2884 B->assembled = PETSC_FALSE; 2885 2886 B->insertmode = NOT_SET_VALUES; 2887 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank)); 2888 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size)); 2889 2890 /* build local table of row and column ownerships */ 2891 PetscCall(PetscMalloc1(b->size+1,&b->rangebs)); 2892 2893 /* build cache for off array entries formed */ 2894 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash)); 2895 2896 b->donotstash = PETSC_FALSE; 2897 b->colmap = NULL; 2898 b->garray = NULL; 2899 b->roworiented = PETSC_TRUE; 2900 2901 /* stuff used in block assembly */ 2902 b->barray = NULL; 2903 2904 /* stuff used for matrix vector multiply */ 2905 b->lvec = NULL; 2906 b->Mvctx = NULL; 2907 2908 /* stuff for MatGetRow() */ 2909 b->rowindices = NULL; 2910 b->rowvalues = NULL; 2911 b->getrowactive = PETSC_FALSE; 2912 2913 /* hash table stuff */ 2914 b->ht = NULL; 2915 b->hd = NULL; 2916 b->ht_size = 0; 2917 b->ht_flag = PETSC_FALSE; 2918 b->ht_fact = 0; 2919 b->ht_total_ct = 0; 2920 b->ht_insert_ct = 0; 2921 2922 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2923 b->ijonly = PETSC_FALSE; 2924 2925 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj)); 2926 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ)); 2927 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ)); 2928 #if defined(PETSC_HAVE_HYPRE) 2929 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE)); 2930 #endif 2931 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ)); 2932 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ)); 2933 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ)); 2934 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ)); 2935 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ)); 2936 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ)); 2937 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS)); 2938 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ)); 2939 2940 PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat"); 2941 PetscCall(PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg)); 2942 if (flg) { 2943 PetscReal fact = 1.39; 2944 PetscCall(MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE)); 2945 PetscCall(PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL)); 2946 if (fact <= 1.0) fact = 1.39; 2947 PetscCall(MatMPIBAIJSetHashTableFactor(B,fact)); 2948 PetscCall(PetscInfo(B,"Hash table Factor used %5.2g\n",(double)fact)); 2949 } 2950 PetscOptionsEnd(); 2951 PetscFunctionReturn(0); 2952 } 2953 2954 /*MC 2955 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2956 2957 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2958 and MATMPIBAIJ otherwise. 2959 2960 Options Database Keys: 2961 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2962 2963 Level: beginner 2964 2965 .seealso: `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 2966 M*/ 2967 2968 /*@C 2969 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2970 (block compressed row). For good matrix assembly performance 2971 the user should preallocate the matrix storage by setting the parameters 2972 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2973 performance can be increased by more than a factor of 50. 2974 2975 Collective on Mat 2976 2977 Input Parameters: 2978 + B - the matrix 2979 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2980 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2981 . d_nz - number of block nonzeros per block row in diagonal portion of local 2982 submatrix (same for all local rows) 2983 . d_nnz - array containing the number of block nonzeros in the various block rows 2984 of the in diagonal portion of the local (possibly different for each block 2985 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 2986 set it even if it is zero. 2987 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2988 submatrix (same for all local rows). 2989 - o_nnz - array containing the number of nonzeros in the various block rows of the 2990 off-diagonal portion of the local submatrix (possibly different for 2991 each block row) or NULL. 2992 2993 If the *_nnz parameter is given then the *_nz parameter is ignored 2994 2995 Options Database Keys: 2996 + -mat_block_size - size of the blocks to use 2997 - -mat_use_hash_table <fact> - set hash table factor 2998 2999 Notes: 3000 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3001 than it must be used on all processors that share the object for that argument. 3002 3003 Storage Information: 3004 For a square global matrix we define each processor's diagonal portion 3005 to be its local rows and the corresponding columns (a square submatrix); 3006 each processor's off-diagonal portion encompasses the remainder of the 3007 local matrix (a rectangular submatrix). 3008 3009 The user can specify preallocated storage for the diagonal part of 3010 the local submatrix with either d_nz or d_nnz (not both). Set 3011 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3012 memory allocation. Likewise, specify preallocated storage for the 3013 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3014 3015 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3016 the figure below we depict these three local rows and all columns (0-11). 3017 3018 .vb 3019 0 1 2 3 4 5 6 7 8 9 10 11 3020 -------------------------- 3021 row 3 |o o o d d d o o o o o o 3022 row 4 |o o o d d d o o o o o o 3023 row 5 |o o o d d d o o o o o o 3024 -------------------------- 3025 .ve 3026 3027 Thus, any entries in the d locations are stored in the d (diagonal) 3028 submatrix, and any entries in the o locations are stored in the 3029 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3030 stored simply in the MATSEQBAIJ format for compressed row storage. 3031 3032 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3033 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3034 In general, for PDE problems in which most nonzeros are near the diagonal, 3035 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3036 or you will get TERRIBLE performance; see the users' manual chapter on 3037 matrices. 3038 3039 You can call MatGetInfo() to get information on how effective the preallocation was; 3040 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3041 You can also run with the option -info and look for messages with the string 3042 malloc in them to see if additional memory allocation was needed. 3043 3044 Level: intermediate 3045 3046 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()` 3047 @*/ 3048 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3049 { 3050 PetscFunctionBegin; 3051 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3052 PetscValidType(B,1); 3053 PetscValidLogicalCollectiveInt(B,bs,2); 3054 PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz)); 3055 PetscFunctionReturn(0); 3056 } 3057 3058 /*@C 3059 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3060 (block compressed row). For good matrix assembly performance 3061 the user should preallocate the matrix storage by setting the parameters 3062 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3063 performance can be increased by more than a factor of 50. 3064 3065 Collective 3066 3067 Input Parameters: 3068 + comm - MPI communicator 3069 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3070 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3071 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3072 This value should be the same as the local size used in creating the 3073 y vector for the matrix-vector product y = Ax. 3074 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3075 This value should be the same as the local size used in creating the 3076 x vector for the matrix-vector product y = Ax. 3077 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3078 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3079 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3080 submatrix (same for all local rows) 3081 . d_nnz - array containing the number of nonzero blocks in the various block rows 3082 of the in diagonal portion of the local (possibly different for each block 3083 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3084 and set it even if it is zero. 3085 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3086 submatrix (same for all local rows). 3087 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3088 off-diagonal portion of the local submatrix (possibly different for 3089 each block row) or NULL. 3090 3091 Output Parameter: 3092 . A - the matrix 3093 3094 Options Database Keys: 3095 + -mat_block_size - size of the blocks to use 3096 - -mat_use_hash_table <fact> - set hash table factor 3097 3098 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3099 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3100 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3101 3102 Notes: 3103 If the *_nnz parameter is given then the *_nz parameter is ignored 3104 3105 A nonzero block is any block that as 1 or more nonzeros in it 3106 3107 The user MUST specify either the local or global matrix dimensions 3108 (possibly both). 3109 3110 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3111 than it must be used on all processors that share the object for that argument. 3112 3113 Storage Information: 3114 For a square global matrix we define each processor's diagonal portion 3115 to be its local rows and the corresponding columns (a square submatrix); 3116 each processor's off-diagonal portion encompasses the remainder of the 3117 local matrix (a rectangular submatrix). 3118 3119 The user can specify preallocated storage for the diagonal part of 3120 the local submatrix with either d_nz or d_nnz (not both). Set 3121 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3122 memory allocation. Likewise, specify preallocated storage for the 3123 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3124 3125 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3126 the figure below we depict these three local rows and all columns (0-11). 3127 3128 .vb 3129 0 1 2 3 4 5 6 7 8 9 10 11 3130 -------------------------- 3131 row 3 |o o o d d d o o o o o o 3132 row 4 |o o o d d d o o o o o o 3133 row 5 |o o o d d d o o o o o o 3134 -------------------------- 3135 .ve 3136 3137 Thus, any entries in the d locations are stored in the d (diagonal) 3138 submatrix, and any entries in the o locations are stored in the 3139 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3140 stored simply in the MATSEQBAIJ format for compressed row storage. 3141 3142 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3143 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3144 In general, for PDE problems in which most nonzeros are near the diagonal, 3145 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3146 or you will get TERRIBLE performance; see the users' manual chapter on 3147 matrices. 3148 3149 Level: intermediate 3150 3151 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 3152 @*/ 3153 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3154 { 3155 PetscMPIInt size; 3156 3157 PetscFunctionBegin; 3158 PetscCall(MatCreate(comm,A)); 3159 PetscCall(MatSetSizes(*A,m,n,M,N)); 3160 PetscCallMPI(MPI_Comm_size(comm,&size)); 3161 if (size > 1) { 3162 PetscCall(MatSetType(*A,MATMPIBAIJ)); 3163 PetscCall(MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz)); 3164 } else { 3165 PetscCall(MatSetType(*A,MATSEQBAIJ)); 3166 PetscCall(MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz)); 3167 } 3168 PetscFunctionReturn(0); 3169 } 3170 3171 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3172 { 3173 Mat mat; 3174 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3175 PetscInt len=0; 3176 3177 PetscFunctionBegin; 3178 *newmat = NULL; 3179 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat)); 3180 PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N)); 3181 PetscCall(MatSetType(mat,((PetscObject)matin)->type_name)); 3182 3183 mat->factortype = matin->factortype; 3184 mat->preallocated = PETSC_TRUE; 3185 mat->assembled = PETSC_TRUE; 3186 mat->insertmode = NOT_SET_VALUES; 3187 3188 a = (Mat_MPIBAIJ*)mat->data; 3189 mat->rmap->bs = matin->rmap->bs; 3190 a->bs2 = oldmat->bs2; 3191 a->mbs = oldmat->mbs; 3192 a->nbs = oldmat->nbs; 3193 a->Mbs = oldmat->Mbs; 3194 a->Nbs = oldmat->Nbs; 3195 3196 PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap)); 3197 PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap)); 3198 3199 a->size = oldmat->size; 3200 a->rank = oldmat->rank; 3201 a->donotstash = oldmat->donotstash; 3202 a->roworiented = oldmat->roworiented; 3203 a->rowindices = NULL; 3204 a->rowvalues = NULL; 3205 a->getrowactive = PETSC_FALSE; 3206 a->barray = NULL; 3207 a->rstartbs = oldmat->rstartbs; 3208 a->rendbs = oldmat->rendbs; 3209 a->cstartbs = oldmat->cstartbs; 3210 a->cendbs = oldmat->cendbs; 3211 3212 /* hash table stuff */ 3213 a->ht = NULL; 3214 a->hd = NULL; 3215 a->ht_size = 0; 3216 a->ht_flag = oldmat->ht_flag; 3217 a->ht_fact = oldmat->ht_fact; 3218 a->ht_total_ct = 0; 3219 a->ht_insert_ct = 0; 3220 3221 PetscCall(PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+1)); 3222 if (oldmat->colmap) { 3223 #if defined(PETSC_USE_CTABLE) 3224 PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap)); 3225 #else 3226 PetscCall(PetscMalloc1(a->Nbs,&a->colmap)); 3227 PetscCall(PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt))); 3228 PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs)); 3229 #endif 3230 } else a->colmap = NULL; 3231 3232 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3233 PetscCall(PetscMalloc1(len,&a->garray)); 3234 PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt))); 3235 PetscCall(PetscArraycpy(a->garray,oldmat->garray,len)); 3236 } else a->garray = NULL; 3237 3238 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash)); 3239 PetscCall(VecDuplicate(oldmat->lvec,&a->lvec)); 3240 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec)); 3241 PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx)); 3242 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx)); 3243 3244 PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A)); 3245 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 3246 PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B)); 3247 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B)); 3248 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist)); 3249 *newmat = mat; 3250 PetscFunctionReturn(0); 3251 } 3252 3253 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3254 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 3255 { 3256 PetscInt header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k; 3257 PetscInt *rowidxs,*colidxs,rs,cs,ce; 3258 PetscScalar *matvals; 3259 3260 PetscFunctionBegin; 3261 PetscCall(PetscViewerSetUp(viewer)); 3262 3263 /* read in matrix header */ 3264 PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT)); 3265 PetscCheck(header[0] == MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3266 M = header[1]; N = header[2]; nz = header[3]; 3267 PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M); 3268 PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N); 3269 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3270 3271 /* set block sizes from the viewer's .info file */ 3272 PetscCall(MatLoad_Binary_BlockSizes(mat,viewer)); 3273 /* set local sizes if not set already */ 3274 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3275 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3276 /* set global sizes if not set already */ 3277 if (mat->rmap->N < 0) mat->rmap->N = M; 3278 if (mat->cmap->N < 0) mat->cmap->N = N; 3279 PetscCall(PetscLayoutSetUp(mat->rmap)); 3280 PetscCall(PetscLayoutSetUp(mat->cmap)); 3281 3282 /* check if the matrix sizes are correct */ 3283 PetscCall(MatGetSize(mat,&rows,&cols)); 3284 PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols); 3285 PetscCall(MatGetBlockSize(mat,&bs)); 3286 PetscCall(MatGetLocalSize(mat,&m,&n)); 3287 PetscCall(PetscLayoutGetRange(mat->rmap,&rs,NULL)); 3288 PetscCall(PetscLayoutGetRange(mat->cmap,&cs,&ce)); 3289 mbs = m/bs; nbs = n/bs; 3290 3291 /* read in row lengths and build row indices */ 3292 PetscCall(PetscMalloc1(m+1,&rowidxs)); 3293 PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT)); 3294 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3295 PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer))); 3296 PetscCheck(sum == nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum); 3297 3298 /* read in column indices and matrix values */ 3299 PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals)); 3300 PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 3301 PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 3302 3303 { /* preallocate matrix storage */ 3304 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3305 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3306 PetscBool sbaij,done; 3307 PetscInt *d_nnz,*o_nnz; 3308 3309 PetscCall(PetscBTCreate(nbs,&bt)); 3310 PetscCall(PetscHSetICreate(&ht)); 3311 PetscCall(PetscCalloc2(mbs,&d_nnz,mbs,&o_nnz)); 3312 PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATMPISBAIJ,&sbaij)); 3313 for (i=0; i<mbs; i++) { 3314 PetscCall(PetscBTMemzero(nbs,bt)); 3315 PetscCall(PetscHSetIClear(ht)); 3316 for (k=0; k<bs; k++) { 3317 PetscInt row = bs*i + k; 3318 for (j=rowidxs[row]; j<rowidxs[row+1]; j++) { 3319 PetscInt col = colidxs[j]; 3320 if (!sbaij || col >= row) { 3321 if (col >= cs && col < ce) { 3322 if (!PetscBTLookupSet(bt,(col-cs)/bs)) d_nnz[i]++; 3323 } else { 3324 PetscCall(PetscHSetIQueryAdd(ht,col/bs,&done)); 3325 if (done) o_nnz[i]++; 3326 } 3327 } 3328 } 3329 } 3330 } 3331 PetscCall(PetscBTDestroy(&bt)); 3332 PetscCall(PetscHSetIDestroy(&ht)); 3333 PetscCall(MatMPIBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz)); 3334 PetscCall(MatMPISBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz)); 3335 PetscCall(PetscFree2(d_nnz,o_nnz)); 3336 } 3337 3338 /* store matrix values */ 3339 for (i=0; i<m; i++) { 3340 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i+1]; 3341 PetscCall((*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES)); 3342 } 3343 3344 PetscCall(PetscFree(rowidxs)); 3345 PetscCall(PetscFree2(colidxs,matvals)); 3346 PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); 3347 PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY)); 3348 PetscFunctionReturn(0); 3349 } 3350 3351 PetscErrorCode MatLoad_MPIBAIJ(Mat mat,PetscViewer viewer) 3352 { 3353 PetscBool isbinary; 3354 3355 PetscFunctionBegin; 3356 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 3357 PetscCheck(isbinary,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 3358 PetscCall(MatLoad_MPIBAIJ_Binary(mat,viewer)); 3359 PetscFunctionReturn(0); 3360 } 3361 3362 /*@ 3363 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3364 3365 Input Parameters: 3366 + mat - the matrix 3367 - fact - factor 3368 3369 Not Collective, each process can use a different factor 3370 3371 Level: advanced 3372 3373 Notes: 3374 This can also be set by the command line option: -mat_use_hash_table <fact> 3375 3376 .seealso: `MatSetOption()` 3377 @*/ 3378 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3379 { 3380 PetscFunctionBegin; 3381 PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact)); 3382 PetscFunctionReturn(0); 3383 } 3384 3385 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3386 { 3387 Mat_MPIBAIJ *baij; 3388 3389 PetscFunctionBegin; 3390 baij = (Mat_MPIBAIJ*)mat->data; 3391 baij->ht_fact = fact; 3392 PetscFunctionReturn(0); 3393 } 3394 3395 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3396 { 3397 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3398 PetscBool flg; 3399 3400 PetscFunctionBegin; 3401 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg)); 3402 PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input"); 3403 if (Ad) *Ad = a->A; 3404 if (Ao) *Ao = a->B; 3405 if (colmap) *colmap = a->garray; 3406 PetscFunctionReturn(0); 3407 } 3408 3409 /* 3410 Special version for direct calls from Fortran (to eliminate two function call overheads 3411 */ 3412 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3413 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3414 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3415 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3416 #endif 3417 3418 /*@C 3419 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3420 3421 Collective on Mat 3422 3423 Input Parameters: 3424 + mat - the matrix 3425 . min - number of input rows 3426 . im - input rows 3427 . nin - number of input columns 3428 . in - input columns 3429 . v - numerical values input 3430 - addvin - INSERT_VALUES or ADD_VALUES 3431 3432 Notes: 3433 This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3434 3435 Level: advanced 3436 3437 .seealso: `MatSetValuesBlocked()` 3438 @*/ 3439 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3440 { 3441 /* convert input arguments to C version */ 3442 Mat mat = *matin; 3443 PetscInt m = *min, n = *nin; 3444 InsertMode addv = *addvin; 3445 3446 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3447 const MatScalar *value; 3448 MatScalar *barray = baij->barray; 3449 PetscBool roworiented = baij->roworiented; 3450 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3451 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3452 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3453 3454 PetscFunctionBegin; 3455 /* tasks normally handled by MatSetValuesBlocked() */ 3456 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3457 else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3458 PetscCheck(!mat->factortype,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3459 if (mat->assembled) { 3460 mat->was_assembled = PETSC_TRUE; 3461 mat->assembled = PETSC_FALSE; 3462 } 3463 PetscCall(PetscLogEventBegin(MAT_SetValues,mat,0,0,0)); 3464 3465 if (!barray) { 3466 PetscCall(PetscMalloc1(bs2,&barray)); 3467 baij->barray = barray; 3468 } 3469 3470 if (roworiented) stepval = (n-1)*bs; 3471 else stepval = (m-1)*bs; 3472 3473 for (i=0; i<m; i++) { 3474 if (im[i] < 0) continue; 3475 PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1); 3476 if (im[i] >= rstart && im[i] < rend) { 3477 row = im[i] - rstart; 3478 for (j=0; j<n; j++) { 3479 /* If NumCol = 1 then a copy is not required */ 3480 if ((roworiented) && (n == 1)) { 3481 barray = (MatScalar*)v + i*bs2; 3482 } else if ((!roworiented) && (m == 1)) { 3483 barray = (MatScalar*)v + j*bs2; 3484 } else { /* Here a copy is required */ 3485 if (roworiented) { 3486 value = v + i*(stepval+bs)*bs + j*bs; 3487 } else { 3488 value = v + j*(stepval+bs)*bs + i*bs; 3489 } 3490 for (ii=0; ii<bs; ii++,value+=stepval) { 3491 for (jj=0; jj<bs; jj++) { 3492 *barray++ = *value++; 3493 } 3494 } 3495 barray -=bs2; 3496 } 3497 3498 if (in[j] >= cstart && in[j] < cend) { 3499 col = in[j] - cstart; 3500 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j])); 3501 } else if (in[j] < 0) continue; 3502 else PetscCheck(in[j] < baij->Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],baij->Nbs-1); 3503 else { 3504 if (mat->was_assembled) { 3505 if (!baij->colmap) { 3506 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 3507 } 3508 3509 #if defined(PETSC_USE_DEBUG) 3510 #if defined(PETSC_USE_CTABLE) 3511 { PetscInt data; 3512 PetscCall(PetscTableFind(baij->colmap,in[j]+1,&data)); 3513 PetscCheck((data - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3514 } 3515 #else 3516 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3517 #endif 3518 #endif 3519 #if defined(PETSC_USE_CTABLE) 3520 PetscCall(PetscTableFind(baij->colmap,in[j]+1,&col)); 3521 col = (col - 1)/bs; 3522 #else 3523 col = (baij->colmap[in[j]] - 1)/bs; 3524 #endif 3525 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3526 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 3527 col = in[j]; 3528 } 3529 } else col = in[j]; 3530 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j])); 3531 } 3532 } 3533 } else { 3534 if (!baij->donotstash) { 3535 if (roworiented) { 3536 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 3537 } else { 3538 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 3539 } 3540 } 3541 } 3542 } 3543 3544 /* task normally handled by MatSetValuesBlocked() */ 3545 PetscCall(PetscLogEventEnd(MAT_SetValues,mat,0,0,0)); 3546 PetscFunctionReturn(0); 3547 } 3548 3549 /*@ 3550 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block 3551 CSR format the local rows. 3552 3553 Collective 3554 3555 Input Parameters: 3556 + comm - MPI communicator 3557 . bs - the block size, only a block size of 1 is supported 3558 . m - number of local rows (Cannot be PETSC_DECIDE) 3559 . n - This value should be the same as the local size used in creating the 3560 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3561 calculated if N is given) For square matrices n is almost always m. 3562 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3563 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3564 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3565 . j - column indices 3566 - a - matrix values 3567 3568 Output Parameter: 3569 . mat - the matrix 3570 3571 Level: intermediate 3572 3573 Notes: 3574 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3575 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3576 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3577 3578 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3579 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3580 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3581 with column-major ordering within blocks. 3582 3583 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3584 3585 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 3586 `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 3587 @*/ 3588 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3589 { 3590 PetscFunctionBegin; 3591 PetscCheck(!i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3592 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3593 PetscCall(MatCreate(comm,mat)); 3594 PetscCall(MatSetSizes(*mat,m,n,M,N)); 3595 PetscCall(MatSetType(*mat,MATMPIBAIJ)); 3596 PetscCall(MatSetBlockSize(*mat,bs)); 3597 PetscCall(MatSetUp(*mat)); 3598 PetscCall(MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE)); 3599 PetscCall(MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a)); 3600 PetscCall(MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE)); 3601 PetscFunctionReturn(0); 3602 } 3603 3604 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3605 { 3606 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3607 PetscInt *indx; 3608 PetscScalar *values; 3609 3610 PetscFunctionBegin; 3611 PetscCall(MatGetSize(inmat,&m,&N)); 3612 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3613 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inmat->data; 3614 PetscInt *dnz,*onz,mbs,Nbs,nbs; 3615 PetscInt *bindx,rmax=a->rmax,j; 3616 PetscMPIInt rank,size; 3617 3618 PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 3619 mbs = m/bs; Nbs = N/cbs; 3620 if (n == PETSC_DECIDE) { 3621 PetscCall(PetscSplitOwnershipBlock(comm,cbs,&n,&N)); 3622 } 3623 nbs = n/cbs; 3624 3625 PetscCall(PetscMalloc1(rmax,&bindx)); 3626 MatPreallocateBegin(comm,mbs,nbs,dnz,onz); /* inline function, output __end and __rstart are used below */ 3627 3628 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3629 PetscCallMPI(MPI_Comm_rank(comm,&size)); 3630 if (rank == size-1) { 3631 /* Check sum(nbs) = Nbs */ 3632 PetscCheck(__end == Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT,__end,Nbs); 3633 } 3634 3635 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 3636 for (i=0; i<mbs; i++) { 3637 PetscCall(MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL)); /* non-blocked nnz and indx */ 3638 nnz = nnz/bs; 3639 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3640 PetscCall(MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz)); 3641 PetscCall(MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL)); 3642 } 3643 PetscCall(PetscFree(bindx)); 3644 3645 PetscCall(MatCreate(comm,outmat)); 3646 PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 3647 PetscCall(MatSetBlockSizes(*outmat,bs,cbs)); 3648 PetscCall(MatSetType(*outmat,MATBAIJ)); 3649 PetscCall(MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz)); 3650 PetscCall(MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz)); 3651 MatPreallocateEnd(dnz,onz); 3652 PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 3653 } 3654 3655 /* numeric phase */ 3656 PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 3657 PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL)); 3658 3659 for (i=0; i<m; i++) { 3660 PetscCall(MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values)); 3661 Ii = i + rstart; 3662 PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES)); 3663 PetscCall(MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values)); 3664 } 3665 PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY)); 3666 PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY)); 3667 PetscFunctionReturn(0); 3668 } 3669