1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 #if defined(PETSC_HAVE_HYPRE) 8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 9 #endif 10 11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 12 { 13 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 14 PetscInt i,*idxb = NULL,m = A->rmap->n,bs = A->cmap->bs; 15 PetscScalar *va,*vv; 16 Vec vB,vA; 17 const PetscScalar *vb; 18 19 PetscFunctionBegin; 20 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA)); 21 PetscCall(MatGetRowMaxAbs(a->A,vA,idx)); 22 23 PetscCall(VecGetArrayWrite(vA,&va)); 24 if (idx) { 25 for (i=0; i<m; i++) { 26 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 27 } 28 } 29 30 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB)); 31 PetscCall(PetscMalloc1(m,&idxb)); 32 PetscCall(MatGetRowMaxAbs(a->B,vB,idxb)); 33 34 PetscCall(VecGetArrayWrite(v,&vv)); 35 PetscCall(VecGetArrayRead(vB,&vb)); 36 for (i=0; i<m; i++) { 37 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 38 vv[i] = vb[i]; 39 if (idx) idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs); 40 } else { 41 vv[i] = va[i]; 42 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs*a->garray[idxb[i]/bs] + (idxb[i] % bs)) 43 idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs); 44 } 45 } 46 PetscCall(VecRestoreArrayWrite(vA,&vv)); 47 PetscCall(VecRestoreArrayWrite(vA,&va)); 48 PetscCall(VecRestoreArrayRead(vB,&vb)); 49 PetscCall(PetscFree(idxb)); 50 PetscCall(VecDestroy(&vA)); 51 PetscCall(VecDestroy(&vB)); 52 PetscFunctionReturn(0); 53 } 54 55 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 56 { 57 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 58 59 PetscFunctionBegin; 60 PetscCall(MatStoreValues(aij->A)); 61 PetscCall(MatStoreValues(aij->B)); 62 PetscFunctionReturn(0); 63 } 64 65 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 66 { 67 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 68 69 PetscFunctionBegin; 70 PetscCall(MatRetrieveValues(aij->A)); 71 PetscCall(MatRetrieveValues(aij->B)); 72 PetscFunctionReturn(0); 73 } 74 75 /* 76 Local utility routine that creates a mapping from the global column 77 number to the local number in the off-diagonal part of the local 78 storage of the matrix. This is done in a non scalable way since the 79 length of colmap equals the global matrix length. 80 */ 81 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 82 { 83 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 84 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 85 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 86 87 PetscFunctionBegin; 88 #if defined(PETSC_USE_CTABLE) 89 PetscCall(PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap)); 90 for (i=0; i<nbs; i++) { 91 PetscCall(PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES)); 92 } 93 #else 94 PetscCall(PetscCalloc1(baij->Nbs+1,&baij->colmap)); 95 PetscCall(PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt))); 96 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 97 #endif 98 PetscFunctionReturn(0); 99 } 100 101 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 102 { \ 103 brow = row/bs; \ 104 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 105 rmax = aimax[brow]; nrow = ailen[brow]; \ 106 bcol = col/bs; \ 107 ridx = row % bs; cidx = col % bs; \ 108 low = 0; high = nrow; \ 109 while (high-low > 3) { \ 110 t = (low+high)/2; \ 111 if (rp[t] > bcol) high = t; \ 112 else low = t; \ 113 } \ 114 for (_i=low; _i<high; _i++) { \ 115 if (rp[_i] > bcol) break; \ 116 if (rp[_i] == bcol) { \ 117 bap = ap + bs2*_i + bs*cidx + ridx; \ 118 if (addv == ADD_VALUES) *bap += value; \ 119 else *bap = value; \ 120 goto a_noinsert; \ 121 } \ 122 } \ 123 if (a->nonew == 1) goto a_noinsert; \ 124 PetscCheck(a->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 125 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 126 N = nrow++ - 1; \ 127 /* shift up all the later entries in this row */ \ 128 PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1));\ 129 PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1))); \ 130 PetscCall(PetscArrayzero(ap+bs2*_i,bs2)); \ 131 rp[_i] = bcol; \ 132 ap[bs2*_i + bs*cidx + ridx] = value; \ 133 a_noinsert:; \ 134 ailen[brow] = nrow; \ 135 } 136 137 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 138 { \ 139 brow = row/bs; \ 140 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 141 rmax = bimax[brow]; nrow = bilen[brow]; \ 142 bcol = col/bs; \ 143 ridx = row % bs; cidx = col % bs; \ 144 low = 0; high = nrow; \ 145 while (high-low > 3) { \ 146 t = (low+high)/2; \ 147 if (rp[t] > bcol) high = t; \ 148 else low = t; \ 149 } \ 150 for (_i=low; _i<high; _i++) { \ 151 if (rp[_i] > bcol) break; \ 152 if (rp[_i] == bcol) { \ 153 bap = ap + bs2*_i + bs*cidx + ridx; \ 154 if (addv == ADD_VALUES) *bap += value; \ 155 else *bap = value; \ 156 goto b_noinsert; \ 157 } \ 158 } \ 159 if (b->nonew == 1) goto b_noinsert; \ 160 PetscCheck(b->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 161 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 162 N = nrow++ - 1; \ 163 /* shift up all the later entries in this row */ \ 164 PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1));\ 165 PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1)));\ 166 PetscCall(PetscArrayzero(ap+bs2*_i,bs2)); \ 167 rp[_i] = bcol; \ 168 ap[bs2*_i + bs*cidx + ridx] = value; \ 169 b_noinsert:; \ 170 bilen[brow] = nrow; \ 171 } 172 173 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 174 { 175 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 176 MatScalar value; 177 PetscBool roworiented = baij->roworiented; 178 PetscInt i,j,row,col; 179 PetscInt rstart_orig=mat->rmap->rstart; 180 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 181 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 182 183 /* Some Variables required in the macro */ 184 Mat A = baij->A; 185 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 186 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 187 MatScalar *aa =a->a; 188 189 Mat B = baij->B; 190 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 191 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 192 MatScalar *ba =b->a; 193 194 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 195 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 196 MatScalar *ap,*bap; 197 198 PetscFunctionBegin; 199 for (i=0; i<m; i++) { 200 if (im[i] < 0) continue; 201 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 202 if (im[i] >= rstart_orig && im[i] < rend_orig) { 203 row = im[i] - rstart_orig; 204 for (j=0; j<n; j++) { 205 if (in[j] >= cstart_orig && in[j] < cend_orig) { 206 col = in[j] - cstart_orig; 207 if (roworiented) value = v[i*n+j]; 208 else value = v[i+j*m]; 209 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 210 } else if (in[j] < 0) { 211 continue; 212 } else { 213 PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 214 if (mat->was_assembled) { 215 if (!baij->colmap) { 216 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 217 } 218 #if defined(PETSC_USE_CTABLE) 219 PetscCall(PetscTableFind(baij->colmap,in[j]/bs + 1,&col)); 220 col = col - 1; 221 #else 222 col = baij->colmap[in[j]/bs] - 1; 223 #endif 224 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 225 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 226 col = in[j]; 227 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 228 B = baij->B; 229 b = (Mat_SeqBAIJ*)(B)->data; 230 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 231 ba =b->a; 232 } else { 233 PetscCheck(col >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 234 col += in[j]%bs; 235 } 236 } else col = in[j]; 237 if (roworiented) value = v[i*n+j]; 238 else value = v[i+j*m]; 239 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 240 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 241 } 242 } 243 } else { 244 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 245 if (!baij->donotstash) { 246 mat->assembled = PETSC_FALSE; 247 if (roworiented) { 248 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE)); 249 } else { 250 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE)); 251 } 252 } 253 } 254 } 255 PetscFunctionReturn(0); 256 } 257 258 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 259 { 260 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 261 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 262 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 263 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 264 PetscBool roworiented=a->roworiented; 265 const PetscScalar *value = v; 266 MatScalar *ap,*aa = a->a,*bap; 267 268 PetscFunctionBegin; 269 rp = aj + ai[row]; 270 ap = aa + bs2*ai[row]; 271 rmax = imax[row]; 272 nrow = ailen[row]; 273 value = v; 274 low = 0; 275 high = nrow; 276 while (high-low > 7) { 277 t = (low+high)/2; 278 if (rp[t] > col) high = t; 279 else low = t; 280 } 281 for (i=low; i<high; i++) { 282 if (rp[i] > col) break; 283 if (rp[i] == col) { 284 bap = ap + bs2*i; 285 if (roworiented) { 286 if (is == ADD_VALUES) { 287 for (ii=0; ii<bs; ii++) { 288 for (jj=ii; jj<bs2; jj+=bs) { 289 bap[jj] += *value++; 290 } 291 } 292 } else { 293 for (ii=0; ii<bs; ii++) { 294 for (jj=ii; jj<bs2; jj+=bs) { 295 bap[jj] = *value++; 296 } 297 } 298 } 299 } else { 300 if (is == ADD_VALUES) { 301 for (ii=0; ii<bs; ii++,value+=bs) { 302 for (jj=0; jj<bs; jj++) { 303 bap[jj] += value[jj]; 304 } 305 bap += bs; 306 } 307 } else { 308 for (ii=0; ii<bs; ii++,value+=bs) { 309 for (jj=0; jj<bs; jj++) { 310 bap[jj] = value[jj]; 311 } 312 bap += bs; 313 } 314 } 315 } 316 goto noinsert2; 317 } 318 } 319 if (nonew == 1) goto noinsert2; 320 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 321 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 322 N = nrow++ - 1; high++; 323 /* shift up all the later entries in this row */ 324 PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1)); 325 PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1))); 326 rp[i] = col; 327 bap = ap + bs2*i; 328 if (roworiented) { 329 for (ii=0; ii<bs; ii++) { 330 for (jj=ii; jj<bs2; jj+=bs) { 331 bap[jj] = *value++; 332 } 333 } 334 } else { 335 for (ii=0; ii<bs; ii++) { 336 for (jj=0; jj<bs; jj++) { 337 *bap++ = *value++; 338 } 339 } 340 } 341 noinsert2:; 342 ailen[row] = nrow; 343 PetscFunctionReturn(0); 344 } 345 346 /* 347 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 348 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 349 */ 350 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 351 { 352 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 353 const PetscScalar *value; 354 MatScalar *barray = baij->barray; 355 PetscBool roworiented = baij->roworiented; 356 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 357 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 358 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 359 360 PetscFunctionBegin; 361 if (!barray) { 362 PetscCall(PetscMalloc1(bs2,&barray)); 363 baij->barray = barray; 364 } 365 366 if (roworiented) stepval = (n-1)*bs; 367 else stepval = (m-1)*bs; 368 369 for (i=0; i<m; i++) { 370 if (im[i] < 0) continue; 371 PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1); 372 if (im[i] >= rstart && im[i] < rend) { 373 row = im[i] - rstart; 374 for (j=0; j<n; j++) { 375 /* If NumCol = 1 then a copy is not required */ 376 if ((roworiented) && (n == 1)) { 377 barray = (MatScalar*)v + i*bs2; 378 } else if ((!roworiented) && (m == 1)) { 379 barray = (MatScalar*)v + j*bs2; 380 } else { /* Here a copy is required */ 381 if (roworiented) { 382 value = v + (i*(stepval+bs) + j)*bs; 383 } else { 384 value = v + (j*(stepval+bs) + i)*bs; 385 } 386 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 387 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 388 barray += bs; 389 } 390 barray -= bs2; 391 } 392 393 if (in[j] >= cstart && in[j] < cend) { 394 col = in[j] - cstart; 395 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j])); 396 } else if (in[j] < 0) { 397 continue; 398 } else { 399 PetscCheck(in[j] < baij->Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT,in[j],baij->Nbs-1); 400 if (mat->was_assembled) { 401 if (!baij->colmap) { 402 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 403 } 404 405 #if defined(PETSC_USE_DEBUG) 406 #if defined(PETSC_USE_CTABLE) 407 { PetscInt data; 408 PetscCall(PetscTableFind(baij->colmap,in[j]+1,&data)); 409 PetscCheck((data - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 410 } 411 #else 412 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 413 #endif 414 #endif 415 #if defined(PETSC_USE_CTABLE) 416 PetscCall(PetscTableFind(baij->colmap,in[j]+1,&col)); 417 col = (col - 1)/bs; 418 #else 419 col = (baij->colmap[in[j]] - 1)/bs; 420 #endif 421 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 422 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 423 col = in[j]; 424 } else PetscCheck(col >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix",im[i],in[j]); 425 } else col = in[j]; 426 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j])); 427 } 428 } 429 } else { 430 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 431 if (!baij->donotstash) { 432 if (roworiented) { 433 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 434 } else { 435 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 436 } 437 } 438 } 439 } 440 PetscFunctionReturn(0); 441 } 442 443 #define HASH_KEY 0.6180339887 444 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 445 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 446 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 447 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 448 { 449 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 450 PetscBool roworiented = baij->roworiented; 451 PetscInt i,j,row,col; 452 PetscInt rstart_orig=mat->rmap->rstart; 453 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 454 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 455 PetscReal tmp; 456 MatScalar **HD = baij->hd,value; 457 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 458 459 PetscFunctionBegin; 460 for (i=0; i<m; i++) { 461 if (PetscDefined(USE_DEBUG)) { 462 PetscCheck(im[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 463 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 464 } 465 row = im[i]; 466 if (row >= rstart_orig && row < rend_orig) { 467 for (j=0; j<n; j++) { 468 col = in[j]; 469 if (roworiented) value = v[i*n+j]; 470 else value = v[i+j*m]; 471 /* Look up PetscInto the Hash Table */ 472 key = (row/bs)*Nbs+(col/bs)+1; 473 h1 = HASH(size,key,tmp); 474 475 idx = h1; 476 if (PetscDefined(USE_DEBUG)) { 477 insert_ct++; 478 total_ct++; 479 if (HT[idx] != key) { 480 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 481 if (idx == size) { 482 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 483 PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 484 } 485 } 486 } else if (HT[idx] != key) { 487 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 488 if (idx == size) { 489 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 490 PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 491 } 492 } 493 /* A HASH table entry is found, so insert the values at the correct address */ 494 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 495 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 496 } 497 } else if (!baij->donotstash) { 498 if (roworiented) { 499 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE)); 500 } else { 501 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE)); 502 } 503 } 504 } 505 if (PetscDefined(USE_DEBUG)) { 506 baij->ht_total_ct += total_ct; 507 baij->ht_insert_ct += insert_ct; 508 } 509 PetscFunctionReturn(0); 510 } 511 512 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 513 { 514 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 515 PetscBool roworiented = baij->roworiented; 516 PetscInt i,j,ii,jj,row,col; 517 PetscInt rstart=baij->rstartbs; 518 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 519 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 520 PetscReal tmp; 521 MatScalar **HD = baij->hd,*baij_a; 522 const PetscScalar *v_t,*value; 523 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 524 525 PetscFunctionBegin; 526 if (roworiented) stepval = (n-1)*bs; 527 else stepval = (m-1)*bs; 528 529 for (i=0; i<m; i++) { 530 if (PetscDefined(USE_DEBUG)) { 531 PetscCheck(im[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %" PetscInt_FMT,im[i]); 532 PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1); 533 } 534 row = im[i]; 535 v_t = v + i*nbs2; 536 if (row >= rstart && row < rend) { 537 for (j=0; j<n; j++) { 538 col = in[j]; 539 540 /* Look up into the Hash Table */ 541 key = row*Nbs+col+1; 542 h1 = HASH(size,key,tmp); 543 544 idx = h1; 545 if (PetscDefined(USE_DEBUG)) { 546 total_ct++; 547 insert_ct++; 548 if (HT[idx] != key) { 549 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 550 if (idx == size) { 551 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 552 PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 553 } 554 } 555 } else if (HT[idx] != key) { 556 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 557 if (idx == size) { 558 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 559 PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 560 } 561 } 562 baij_a = HD[idx]; 563 if (roworiented) { 564 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 565 /* value = v + (i*(stepval+bs)+j)*bs; */ 566 value = v_t; 567 v_t += bs; 568 if (addv == ADD_VALUES) { 569 for (ii=0; ii<bs; ii++,value+=stepval) { 570 for (jj=ii; jj<bs2; jj+=bs) { 571 baij_a[jj] += *value++; 572 } 573 } 574 } else { 575 for (ii=0; ii<bs; ii++,value+=stepval) { 576 for (jj=ii; jj<bs2; jj+=bs) { 577 baij_a[jj] = *value++; 578 } 579 } 580 } 581 } else { 582 value = v + j*(stepval+bs)*bs + i*bs; 583 if (addv == ADD_VALUES) { 584 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 585 for (jj=0; jj<bs; jj++) { 586 baij_a[jj] += *value++; 587 } 588 } 589 } else { 590 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 591 for (jj=0; jj<bs; jj++) { 592 baij_a[jj] = *value++; 593 } 594 } 595 } 596 } 597 } 598 } else { 599 if (!baij->donotstash) { 600 if (roworiented) { 601 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 602 } else { 603 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 604 } 605 } 606 } 607 } 608 if (PetscDefined(USE_DEBUG)) { 609 baij->ht_total_ct += total_ct; 610 baij->ht_insert_ct += insert_ct; 611 } 612 PetscFunctionReturn(0); 613 } 614 615 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 616 { 617 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 618 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 619 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 620 621 PetscFunctionBegin; 622 for (i=0; i<m; i++) { 623 if (idxm[i] < 0) continue; /* negative row */ 624 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1); 625 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 626 row = idxm[i] - bsrstart; 627 for (j=0; j<n; j++) { 628 if (idxn[j] < 0) continue; /* negative column */ 629 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1); 630 if (idxn[j] >= bscstart && idxn[j] < bscend) { 631 col = idxn[j] - bscstart; 632 PetscCall(MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j)); 633 } else { 634 if (!baij->colmap) { 635 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 636 } 637 #if defined(PETSC_USE_CTABLE) 638 PetscCall(PetscTableFind(baij->colmap,idxn[j]/bs+1,&data)); 639 data--; 640 #else 641 data = baij->colmap[idxn[j]/bs]-1; 642 #endif 643 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 644 else { 645 col = data + idxn[j]%bs; 646 PetscCall(MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j)); 647 } 648 } 649 } 650 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 651 } 652 PetscFunctionReturn(0); 653 } 654 655 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 656 { 657 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 658 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 659 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 660 PetscReal sum = 0.0; 661 MatScalar *v; 662 663 PetscFunctionBegin; 664 if (baij->size == 1) { 665 PetscCall(MatNorm(baij->A,type,nrm)); 666 } else { 667 if (type == NORM_FROBENIUS) { 668 v = amat->a; 669 nz = amat->nz*bs2; 670 for (i=0; i<nz; i++) { 671 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 672 } 673 v = bmat->a; 674 nz = bmat->nz*bs2; 675 for (i=0; i<nz; i++) { 676 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 677 } 678 PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 679 *nrm = PetscSqrtReal(*nrm); 680 } else if (type == NORM_1) { /* max column sum */ 681 PetscReal *tmp,*tmp2; 682 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 683 PetscCall(PetscCalloc1(mat->cmap->N,&tmp)); 684 PetscCall(PetscMalloc1(mat->cmap->N,&tmp2)); 685 v = amat->a; jj = amat->j; 686 for (i=0; i<amat->nz; i++) { 687 for (j=0; j<bs; j++) { 688 col = bs*(cstart + *jj) + j; /* column index */ 689 for (row=0; row<bs; row++) { 690 tmp[col] += PetscAbsScalar(*v); v++; 691 } 692 } 693 jj++; 694 } 695 v = bmat->a; jj = bmat->j; 696 for (i=0; i<bmat->nz; i++) { 697 for (j=0; j<bs; j++) { 698 col = bs*garray[*jj] + j; 699 for (row=0; row<bs; row++) { 700 tmp[col] += PetscAbsScalar(*v); v++; 701 } 702 } 703 jj++; 704 } 705 PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 706 *nrm = 0.0; 707 for (j=0; j<mat->cmap->N; j++) { 708 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 709 } 710 PetscCall(PetscFree(tmp)); 711 PetscCall(PetscFree(tmp2)); 712 } else if (type == NORM_INFINITY) { /* max row sum */ 713 PetscReal *sums; 714 PetscCall(PetscMalloc1(bs,&sums)); 715 sum = 0.0; 716 for (j=0; j<amat->mbs; j++) { 717 for (row=0; row<bs; row++) sums[row] = 0.0; 718 v = amat->a + bs2*amat->i[j]; 719 nz = amat->i[j+1]-amat->i[j]; 720 for (i=0; i<nz; i++) { 721 for (col=0; col<bs; col++) { 722 for (row=0; row<bs; row++) { 723 sums[row] += PetscAbsScalar(*v); v++; 724 } 725 } 726 } 727 v = bmat->a + bs2*bmat->i[j]; 728 nz = bmat->i[j+1]-bmat->i[j]; 729 for (i=0; i<nz; i++) { 730 for (col=0; col<bs; col++) { 731 for (row=0; row<bs; row++) { 732 sums[row] += PetscAbsScalar(*v); v++; 733 } 734 } 735 } 736 for (row=0; row<bs; row++) { 737 if (sums[row] > sum) sum = sums[row]; 738 } 739 } 740 PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat))); 741 PetscCall(PetscFree(sums)); 742 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 743 } 744 PetscFunctionReturn(0); 745 } 746 747 /* 748 Creates the hash table, and sets the table 749 This table is created only once. 750 If new entried need to be added to the matrix 751 then the hash table has to be destroyed and 752 recreated. 753 */ 754 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 755 { 756 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 757 Mat A = baij->A,B=baij->B; 758 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 759 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 760 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 761 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 762 PetscInt *HT,key; 763 MatScalar **HD; 764 PetscReal tmp; 765 #if defined(PETSC_USE_INFO) 766 PetscInt ct=0,max=0; 767 #endif 768 769 PetscFunctionBegin; 770 if (baij->ht) PetscFunctionReturn(0); 771 772 baij->ht_size = (PetscInt)(factor*nz); 773 ht_size = baij->ht_size; 774 775 /* Allocate Memory for Hash Table */ 776 PetscCall(PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht)); 777 HD = baij->hd; 778 HT = baij->ht; 779 780 /* Loop Over A */ 781 for (i=0; i<a->mbs; i++) { 782 for (j=ai[i]; j<ai[i+1]; j++) { 783 row = i+rstart; 784 col = aj[j]+cstart; 785 786 key = row*Nbs + col + 1; 787 h1 = HASH(ht_size,key,tmp); 788 for (k=0; k<ht_size; k++) { 789 if (!HT[(h1+k)%ht_size]) { 790 HT[(h1+k)%ht_size] = key; 791 HD[(h1+k)%ht_size] = a->a + j*bs2; 792 break; 793 #if defined(PETSC_USE_INFO) 794 } else { 795 ct++; 796 #endif 797 } 798 } 799 #if defined(PETSC_USE_INFO) 800 if (k> max) max = k; 801 #endif 802 } 803 } 804 /* Loop Over B */ 805 for (i=0; i<b->mbs; i++) { 806 for (j=bi[i]; j<bi[i+1]; j++) { 807 row = i+rstart; 808 col = garray[bj[j]]; 809 key = row*Nbs + col + 1; 810 h1 = HASH(ht_size,key,tmp); 811 for (k=0; k<ht_size; k++) { 812 if (!HT[(h1+k)%ht_size]) { 813 HT[(h1+k)%ht_size] = key; 814 HD[(h1+k)%ht_size] = b->a + j*bs2; 815 break; 816 #if defined(PETSC_USE_INFO) 817 } else { 818 ct++; 819 #endif 820 } 821 } 822 #if defined(PETSC_USE_INFO) 823 if (k> max) max = k; 824 #endif 825 } 826 } 827 828 /* Print Summary */ 829 #if defined(PETSC_USE_INFO) 830 for (i=0,j=0; i<ht_size; i++) { 831 if (HT[i]) j++; 832 } 833 PetscCall(PetscInfo(mat,"Average Search = %5.2g,max search = %" PetscInt_FMT "\n",(!j) ? (double)0.0:(double)(((PetscReal)(ct+j))/(double)j),max)); 834 #endif 835 PetscFunctionReturn(0); 836 } 837 838 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 839 { 840 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 841 PetscInt nstash,reallocs; 842 843 PetscFunctionBegin; 844 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 845 846 PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range)); 847 PetscCall(MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs)); 848 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 849 PetscCall(PetscInfo(mat,"Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 850 PetscCall(MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs)); 851 PetscCall(PetscInfo(mat,"Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 852 PetscFunctionReturn(0); 853 } 854 855 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 856 { 857 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 858 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 859 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 860 PetscInt *row,*col; 861 PetscBool r1,r2,r3,other_disassembled; 862 MatScalar *val; 863 PetscMPIInt n; 864 865 PetscFunctionBegin; 866 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 867 if (!baij->donotstash && !mat->nooffprocentries) { 868 while (1) { 869 PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg)); 870 if (!flg) break; 871 872 for (i=0; i<n;) { 873 /* Now identify the consecutive vals belonging to the same row */ 874 for (j=i,rstart=row[j]; j<n; j++) { 875 if (row[j] != rstart) break; 876 } 877 if (j < n) ncols = j-i; 878 else ncols = n-i; 879 /* Now assemble all these values with a single function call */ 880 PetscCall(MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode)); 881 i = j; 882 } 883 } 884 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 885 /* Now process the block-stash. Since the values are stashed column-oriented, 886 set the roworiented flag to column oriented, and after MatSetValues() 887 restore the original flags */ 888 r1 = baij->roworiented; 889 r2 = a->roworiented; 890 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 891 892 baij->roworiented = PETSC_FALSE; 893 a->roworiented = PETSC_FALSE; 894 895 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 896 while (1) { 897 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg)); 898 if (!flg) break; 899 900 for (i=0; i<n;) { 901 /* Now identify the consecutive vals belonging to the same row */ 902 for (j=i,rstart=row[j]; j<n; j++) { 903 if (row[j] != rstart) break; 904 } 905 if (j < n) ncols = j-i; 906 else ncols = n-i; 907 PetscCall(MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode)); 908 i = j; 909 } 910 } 911 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 912 913 baij->roworiented = r1; 914 a->roworiented = r2; 915 916 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 917 } 918 919 PetscCall(MatAssemblyBegin(baij->A,mode)); 920 PetscCall(MatAssemblyEnd(baij->A,mode)); 921 922 /* determine if any processor has disassembled, if so we must 923 also disassemble ourselves, in order that we may reassemble. */ 924 /* 925 if nonzero structure of submatrix B cannot change then we know that 926 no processor disassembled thus we can skip this stuff 927 */ 928 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 929 PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat))); 930 if (mat->was_assembled && !other_disassembled) { 931 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 932 } 933 } 934 935 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 936 PetscCall(MatSetUpMultiply_MPIBAIJ(mat)); 937 } 938 PetscCall(MatAssemblyBegin(baij->B,mode)); 939 PetscCall(MatAssemblyEnd(baij->B,mode)); 940 941 #if defined(PETSC_USE_INFO) 942 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 943 PetscCall(PetscInfo(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct)); 944 945 baij->ht_total_ct = 0; 946 baij->ht_insert_ct = 0; 947 } 948 #endif 949 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 950 PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact)); 951 952 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 953 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 954 } 955 956 PetscCall(PetscFree2(baij->rowvalues,baij->rowindices)); 957 958 baij->rowvalues = NULL; 959 960 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 961 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 962 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 963 PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat))); 964 } 965 PetscFunctionReturn(0); 966 } 967 968 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer); 969 #include <petscdraw.h> 970 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 971 { 972 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 973 PetscMPIInt rank = baij->rank; 974 PetscInt bs = mat->rmap->bs; 975 PetscBool iascii,isdraw; 976 PetscViewer sviewer; 977 PetscViewerFormat format; 978 979 PetscFunctionBegin; 980 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 981 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 982 if (iascii) { 983 PetscCall(PetscViewerGetFormat(viewer,&format)); 984 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 985 MatInfo info; 986 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 987 PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 988 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 989 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", 990 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory)); 991 PetscCall(MatGetInfo(baij->A,MAT_LOCAL,&info)); 992 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 993 PetscCall(MatGetInfo(baij->B,MAT_LOCAL,&info)); 994 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 995 PetscCall(PetscViewerFlush(viewer)); 996 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 997 PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n")); 998 PetscCall(VecScatterView(baij->Mvctx,viewer)); 999 PetscFunctionReturn(0); 1000 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1001 PetscCall(PetscViewerASCIIPrintf(viewer," block size is %" PetscInt_FMT "\n",bs)); 1002 PetscFunctionReturn(0); 1003 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1004 PetscFunctionReturn(0); 1005 } 1006 } 1007 1008 if (isdraw) { 1009 PetscDraw draw; 1010 PetscBool isnull; 1011 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 1012 PetscCall(PetscDrawIsNull(draw,&isnull)); 1013 if (isnull) PetscFunctionReturn(0); 1014 } 1015 1016 { 1017 /* assemble the entire matrix onto first processor. */ 1018 Mat A; 1019 Mat_SeqBAIJ *Aloc; 1020 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1021 MatScalar *a; 1022 const char *matname; 1023 1024 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1025 /* Perhaps this should be the type of mat? */ 1026 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A)); 1027 if (rank == 0) { 1028 PetscCall(MatSetSizes(A,M,N,M,N)); 1029 } else { 1030 PetscCall(MatSetSizes(A,0,0,M,N)); 1031 } 1032 PetscCall(MatSetType(A,MATMPIBAIJ)); 1033 PetscCall(MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL)); 1034 PetscCall(MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE)); 1035 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A)); 1036 1037 /* copy over the A part */ 1038 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1039 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1040 PetscCall(PetscMalloc1(bs,&rvals)); 1041 1042 for (i=0; i<mbs; i++) { 1043 rvals[0] = bs*(baij->rstartbs + i); 1044 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1045 for (j=ai[i]; j<ai[i+1]; j++) { 1046 col = (baij->cstartbs+aj[j])*bs; 1047 for (k=0; k<bs; k++) { 1048 PetscCall(MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES)); 1049 col++; a += bs; 1050 } 1051 } 1052 } 1053 /* copy over the B part */ 1054 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1055 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1056 for (i=0; i<mbs; i++) { 1057 rvals[0] = bs*(baij->rstartbs + i); 1058 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1059 for (j=ai[i]; j<ai[i+1]; j++) { 1060 col = baij->garray[aj[j]]*bs; 1061 for (k=0; k<bs; k++) { 1062 PetscCall(MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES)); 1063 col++; a += bs; 1064 } 1065 } 1066 } 1067 PetscCall(PetscFree(rvals)); 1068 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 1069 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 1070 /* 1071 Everyone has to call to draw the matrix since the graphics waits are 1072 synchronized across all processors that share the PetscDraw object 1073 */ 1074 PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1075 PetscCall(PetscObjectGetName((PetscObject)mat,&matname)); 1076 if (rank == 0) { 1077 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname)); 1078 PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer)); 1079 } 1080 PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1081 PetscCall(PetscViewerFlush(viewer)); 1082 PetscCall(MatDestroy(&A)); 1083 } 1084 PetscFunctionReturn(0); 1085 } 1086 1087 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1088 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1089 { 1090 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 1091 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)aij->A->data; 1092 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)aij->B->data; 1093 const PetscInt *garray = aij->garray; 1094 PetscInt header[4],M,N,m,rs,cs,bs,nz,cnt,i,j,ja,jb,k,l; 1095 PetscInt *rowlens,*colidxs; 1096 PetscScalar *matvals; 1097 1098 PetscFunctionBegin; 1099 PetscCall(PetscViewerSetUp(viewer)); 1100 1101 M = mat->rmap->N; 1102 N = mat->cmap->N; 1103 m = mat->rmap->n; 1104 rs = mat->rmap->rstart; 1105 cs = mat->cmap->rstart; 1106 bs = mat->rmap->bs; 1107 nz = bs*bs*(A->nz + B->nz); 1108 1109 /* write matrix header */ 1110 header[0] = MAT_FILE_CLASSID; 1111 header[1] = M; header[2] = N; header[3] = nz; 1112 PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat))); 1113 PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT)); 1114 1115 /* fill in and store row lengths */ 1116 PetscCall(PetscMalloc1(m,&rowlens)); 1117 for (cnt=0, i=0; i<A->mbs; i++) 1118 for (j=0; j<bs; j++) 1119 rowlens[cnt++] = bs*(A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]); 1120 PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT)); 1121 PetscCall(PetscFree(rowlens)); 1122 1123 /* fill in and store column indices */ 1124 PetscCall(PetscMalloc1(nz,&colidxs)); 1125 for (cnt=0, i=0; i<A->mbs; i++) { 1126 for (k=0; k<bs; k++) { 1127 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1128 if (garray[B->j[jb]] > cs/bs) break; 1129 for (l=0; l<bs; l++) 1130 colidxs[cnt++] = bs*garray[B->j[jb]] + l; 1131 } 1132 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1133 for (l=0; l<bs; l++) 1134 colidxs[cnt++] = bs*A->j[ja] + l + cs; 1135 for (; jb<B->i[i+1]; jb++) 1136 for (l=0; l<bs; l++) 1137 colidxs[cnt++] = bs*garray[B->j[jb]] + l; 1138 } 1139 } 1140 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1141 PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_INT)); 1142 PetscCall(PetscFree(colidxs)); 1143 1144 /* fill in and store nonzero values */ 1145 PetscCall(PetscMalloc1(nz,&matvals)); 1146 for (cnt=0, i=0; i<A->mbs; i++) { 1147 for (k=0; k<bs; k++) { 1148 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1149 if (garray[B->j[jb]] > cs/bs) break; 1150 for (l=0; l<bs; l++) 1151 matvals[cnt++] = B->a[bs*(bs*jb + l) + k]; 1152 } 1153 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1154 for (l=0; l<bs; l++) 1155 matvals[cnt++] = A->a[bs*(bs*ja + l) + k]; 1156 for (; jb<B->i[i+1]; jb++) 1157 for (l=0; l<bs; l++) 1158 matvals[cnt++] = B->a[bs*(bs*jb + l) + k]; 1159 } 1160 } 1161 PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_SCALAR)); 1162 PetscCall(PetscFree(matvals)); 1163 1164 /* write block size option to the viewer's .info file */ 1165 PetscCall(MatView_Binary_BlockSizes(mat,viewer)); 1166 PetscFunctionReturn(0); 1167 } 1168 1169 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1170 { 1171 PetscBool iascii,isdraw,issocket,isbinary; 1172 1173 PetscFunctionBegin; 1174 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1175 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1176 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket)); 1177 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1178 if (iascii || isdraw || issocket) { 1179 PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer)); 1180 } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat,viewer)); 1181 PetscFunctionReturn(0); 1182 } 1183 1184 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1185 { 1186 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1187 1188 PetscFunctionBegin; 1189 #if defined(PETSC_USE_LOG) 1190 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 1191 #endif 1192 PetscCall(MatStashDestroy_Private(&mat->stash)); 1193 PetscCall(MatStashDestroy_Private(&mat->bstash)); 1194 PetscCall(MatDestroy(&baij->A)); 1195 PetscCall(MatDestroy(&baij->B)); 1196 #if defined(PETSC_USE_CTABLE) 1197 PetscCall(PetscTableDestroy(&baij->colmap)); 1198 #else 1199 PetscCall(PetscFree(baij->colmap)); 1200 #endif 1201 PetscCall(PetscFree(baij->garray)); 1202 PetscCall(VecDestroy(&baij->lvec)); 1203 PetscCall(VecScatterDestroy(&baij->Mvctx)); 1204 PetscCall(PetscFree2(baij->rowvalues,baij->rowindices)); 1205 PetscCall(PetscFree(baij->barray)); 1206 PetscCall(PetscFree2(baij->hd,baij->ht)); 1207 PetscCall(PetscFree(baij->rangebs)); 1208 PetscCall(PetscFree(mat->data)); 1209 1210 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL)); 1211 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL)); 1212 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL)); 1213 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL)); 1214 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL)); 1215 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL)); 1216 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL)); 1217 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL)); 1218 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpiadj_C",NULL)); 1219 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpiaij_C",NULL)); 1220 #if defined(PETSC_HAVE_HYPRE) 1221 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL)); 1222 #endif 1223 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL)); 1224 PetscFunctionReturn(0); 1225 } 1226 1227 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1228 { 1229 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1230 PetscInt nt; 1231 1232 PetscFunctionBegin; 1233 PetscCall(VecGetLocalSize(xx,&nt)); 1234 PetscCheck(nt == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1235 PetscCall(VecGetLocalSize(yy,&nt)); 1236 PetscCheck(nt == A->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1237 PetscCall(VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1238 PetscCall((*a->A->ops->mult)(a->A,xx,yy)); 1239 PetscCall(VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1240 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy)); 1241 PetscFunctionReturn(0); 1242 } 1243 1244 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1245 { 1246 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1247 1248 PetscFunctionBegin; 1249 PetscCall(VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1250 PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz)); 1251 PetscCall(VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1252 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz)); 1253 PetscFunctionReturn(0); 1254 } 1255 1256 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1257 { 1258 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1259 1260 PetscFunctionBegin; 1261 /* do nondiagonal part */ 1262 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1263 /* do local part */ 1264 PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy)); 1265 /* add partial results together */ 1266 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1267 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1268 PetscFunctionReturn(0); 1269 } 1270 1271 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1272 { 1273 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1274 1275 PetscFunctionBegin; 1276 /* do nondiagonal part */ 1277 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1278 /* do local part */ 1279 PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz)); 1280 /* add partial results together */ 1281 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1282 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1283 PetscFunctionReturn(0); 1284 } 1285 1286 /* 1287 This only works correctly for square matrices where the subblock A->A is the 1288 diagonal block 1289 */ 1290 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1291 { 1292 PetscFunctionBegin; 1293 PetscCheck(A->rmap->N == A->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1294 PetscCall(MatGetDiagonal(((Mat_MPIBAIJ*)A->data)->A,v)); 1295 PetscFunctionReturn(0); 1296 } 1297 1298 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1299 { 1300 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1301 1302 PetscFunctionBegin; 1303 PetscCall(MatScale(a->A,aa)); 1304 PetscCall(MatScale(a->B,aa)); 1305 PetscFunctionReturn(0); 1306 } 1307 1308 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1309 { 1310 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1311 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1312 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1313 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1314 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1315 1316 PetscFunctionBegin; 1317 PetscCheck(row >= brstart && row < brend,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1318 PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1319 mat->getrowactive = PETSC_TRUE; 1320 1321 if (!mat->rowvalues && (idx || v)) { 1322 /* 1323 allocate enough space to hold information from the longest row. 1324 */ 1325 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1326 PetscInt max = 1,mbs = mat->mbs,tmp; 1327 for (i=0; i<mbs; i++) { 1328 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1329 if (max < tmp) max = tmp; 1330 } 1331 PetscCall(PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices)); 1332 } 1333 lrow = row - brstart; 1334 1335 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1336 if (!v) {pvA = NULL; pvB = NULL;} 1337 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1338 PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA)); 1339 PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB)); 1340 nztot = nzA + nzB; 1341 1342 cmap = mat->garray; 1343 if (v || idx) { 1344 if (nztot) { 1345 /* Sort by increasing column numbers, assuming A and B already sorted */ 1346 PetscInt imark = -1; 1347 if (v) { 1348 *v = v_p = mat->rowvalues; 1349 for (i=0; i<nzB; i++) { 1350 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1351 else break; 1352 } 1353 imark = i; 1354 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1355 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1356 } 1357 if (idx) { 1358 *idx = idx_p = mat->rowindices; 1359 if (imark > -1) { 1360 for (i=0; i<imark; i++) { 1361 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1362 } 1363 } else { 1364 for (i=0; i<nzB; i++) { 1365 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1366 else break; 1367 } 1368 imark = i; 1369 } 1370 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1371 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1372 } 1373 } else { 1374 if (idx) *idx = NULL; 1375 if (v) *v = NULL; 1376 } 1377 } 1378 *nz = nztot; 1379 PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA)); 1380 PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB)); 1381 PetscFunctionReturn(0); 1382 } 1383 1384 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1385 { 1386 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1387 1388 PetscFunctionBegin; 1389 PetscCheck(baij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1390 baij->getrowactive = PETSC_FALSE; 1391 PetscFunctionReturn(0); 1392 } 1393 1394 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1395 { 1396 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1397 1398 PetscFunctionBegin; 1399 PetscCall(MatZeroEntries(l->A)); 1400 PetscCall(MatZeroEntries(l->B)); 1401 PetscFunctionReturn(0); 1402 } 1403 1404 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1405 { 1406 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1407 Mat A = a->A,B = a->B; 1408 PetscLogDouble isend[5],irecv[5]; 1409 1410 PetscFunctionBegin; 1411 info->block_size = (PetscReal)matin->rmap->bs; 1412 1413 PetscCall(MatGetInfo(A,MAT_LOCAL,info)); 1414 1415 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1416 isend[3] = info->memory; isend[4] = info->mallocs; 1417 1418 PetscCall(MatGetInfo(B,MAT_LOCAL,info)); 1419 1420 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1421 isend[3] += info->memory; isend[4] += info->mallocs; 1422 1423 if (flag == MAT_LOCAL) { 1424 info->nz_used = isend[0]; 1425 info->nz_allocated = isend[1]; 1426 info->nz_unneeded = isend[2]; 1427 info->memory = isend[3]; 1428 info->mallocs = isend[4]; 1429 } else if (flag == MAT_GLOBAL_MAX) { 1430 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin))); 1431 1432 info->nz_used = irecv[0]; 1433 info->nz_allocated = irecv[1]; 1434 info->nz_unneeded = irecv[2]; 1435 info->memory = irecv[3]; 1436 info->mallocs = irecv[4]; 1437 } else if (flag == MAT_GLOBAL_SUM) { 1438 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin))); 1439 1440 info->nz_used = irecv[0]; 1441 info->nz_allocated = irecv[1]; 1442 info->nz_unneeded = irecv[2]; 1443 info->memory = irecv[3]; 1444 info->mallocs = irecv[4]; 1445 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1446 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1447 info->fill_ratio_needed = 0; 1448 info->factor_mallocs = 0; 1449 PetscFunctionReturn(0); 1450 } 1451 1452 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1453 { 1454 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1455 1456 PetscFunctionBegin; 1457 switch (op) { 1458 case MAT_NEW_NONZERO_LOCATIONS: 1459 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1460 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1461 case MAT_KEEP_NONZERO_PATTERN: 1462 case MAT_NEW_NONZERO_LOCATION_ERR: 1463 MatCheckPreallocated(A,1); 1464 PetscCall(MatSetOption(a->A,op,flg)); 1465 PetscCall(MatSetOption(a->B,op,flg)); 1466 break; 1467 case MAT_ROW_ORIENTED: 1468 MatCheckPreallocated(A,1); 1469 a->roworiented = flg; 1470 1471 PetscCall(MatSetOption(a->A,op,flg)); 1472 PetscCall(MatSetOption(a->B,op,flg)); 1473 break; 1474 case MAT_FORCE_DIAGONAL_ENTRIES: 1475 case MAT_SORTED_FULL: 1476 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 1477 break; 1478 case MAT_IGNORE_OFF_PROC_ENTRIES: 1479 a->donotstash = flg; 1480 break; 1481 case MAT_USE_HASH_TABLE: 1482 a->ht_flag = flg; 1483 a->ht_fact = 1.39; 1484 break; 1485 case MAT_SYMMETRIC: 1486 case MAT_STRUCTURALLY_SYMMETRIC: 1487 case MAT_HERMITIAN: 1488 case MAT_SUBMAT_SINGLEIS: 1489 case MAT_SYMMETRY_ETERNAL: 1490 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1491 case MAT_SPD_ETERNAL: 1492 /* if the diagonal matrix is square it inherits some of the properties above */ 1493 break; 1494 default: 1495 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1496 } 1497 PetscFunctionReturn(0); 1498 } 1499 1500 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1501 { 1502 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1503 Mat_SeqBAIJ *Aloc; 1504 Mat B; 1505 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1506 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1507 MatScalar *a; 1508 1509 PetscFunctionBegin; 1510 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1511 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 1512 PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M)); 1513 PetscCall(MatSetType(B,((PetscObject)A)->type_name)); 1514 /* Do not know preallocation information, but must set block size */ 1515 PetscCall(MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL)); 1516 } else { 1517 B = *matout; 1518 } 1519 1520 /* copy over the A part */ 1521 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1522 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1523 PetscCall(PetscMalloc1(bs,&rvals)); 1524 1525 for (i=0; i<mbs; i++) { 1526 rvals[0] = bs*(baij->rstartbs + i); 1527 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1528 for (j=ai[i]; j<ai[i+1]; j++) { 1529 col = (baij->cstartbs+aj[j])*bs; 1530 for (k=0; k<bs; k++) { 1531 PetscCall(MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES)); 1532 1533 col++; a += bs; 1534 } 1535 } 1536 } 1537 /* copy over the B part */ 1538 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1539 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1540 for (i=0; i<mbs; i++) { 1541 rvals[0] = bs*(baij->rstartbs + i); 1542 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1543 for (j=ai[i]; j<ai[i+1]; j++) { 1544 col = baij->garray[aj[j]]*bs; 1545 for (k=0; k<bs; k++) { 1546 PetscCall(MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES)); 1547 col++; 1548 a += bs; 1549 } 1550 } 1551 } 1552 PetscCall(PetscFree(rvals)); 1553 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 1554 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 1555 1556 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1557 else { 1558 PetscCall(MatHeaderMerge(A,&B)); 1559 } 1560 PetscFunctionReturn(0); 1561 } 1562 1563 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1564 { 1565 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1566 Mat a = baij->A,b = baij->B; 1567 PetscInt s1,s2,s3; 1568 1569 PetscFunctionBegin; 1570 PetscCall(MatGetLocalSize(mat,&s2,&s3)); 1571 if (rr) { 1572 PetscCall(VecGetLocalSize(rr,&s1)); 1573 PetscCheck(s1==s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1574 /* Overlap communication with computation. */ 1575 PetscCall(VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1576 } 1577 if (ll) { 1578 PetscCall(VecGetLocalSize(ll,&s1)); 1579 PetscCheck(s1==s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1580 PetscCall((*b->ops->diagonalscale)(b,ll,NULL)); 1581 } 1582 /* scale the diagonal block */ 1583 PetscCall((*a->ops->diagonalscale)(a,ll,rr)); 1584 1585 if (rr) { 1586 /* Do a scatter end and then right scale the off-diagonal block */ 1587 PetscCall(VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1588 PetscCall((*b->ops->diagonalscale)(b,NULL,baij->lvec)); 1589 } 1590 PetscFunctionReturn(0); 1591 } 1592 1593 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1594 { 1595 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1596 PetscInt *lrows; 1597 PetscInt r, len; 1598 PetscBool cong; 1599 1600 PetscFunctionBegin; 1601 /* get locally owned rows */ 1602 PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows)); 1603 /* fix right hand side if needed */ 1604 if (x && b) { 1605 const PetscScalar *xx; 1606 PetscScalar *bb; 1607 1608 PetscCall(VecGetArrayRead(x,&xx)); 1609 PetscCall(VecGetArray(b,&bb)); 1610 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 1611 PetscCall(VecRestoreArrayRead(x,&xx)); 1612 PetscCall(VecRestoreArray(b,&bb)); 1613 } 1614 1615 /* actually zap the local rows */ 1616 /* 1617 Zero the required rows. If the "diagonal block" of the matrix 1618 is square and the user wishes to set the diagonal we use separate 1619 code so that MatSetValues() is not called for each diagonal allocating 1620 new memory, thus calling lots of mallocs and slowing things down. 1621 1622 */ 1623 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1624 PetscCall(MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL)); 1625 PetscCall(MatHasCongruentLayouts(A,&cong)); 1626 if ((diag != 0.0) && cong) { 1627 PetscCall(MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL)); 1628 } else if (diag != 0.0) { 1629 PetscCall(MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL)); 1630 PetscCheck(!((Mat_SeqBAIJ*)l->A->data)->nonew,PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1631 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1632 for (r = 0; r < len; ++r) { 1633 const PetscInt row = lrows[r] + A->rmap->rstart; 1634 PetscCall(MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES)); 1635 } 1636 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 1637 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 1638 } else { 1639 PetscCall(MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL)); 1640 } 1641 PetscCall(PetscFree(lrows)); 1642 1643 /* only change matrix nonzero state if pattern was allowed to be changed */ 1644 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1645 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1646 PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A))); 1647 } 1648 PetscFunctionReturn(0); 1649 } 1650 1651 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1652 { 1653 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1654 PetscMPIInt n = A->rmap->n,p = 0; 1655 PetscInt i,j,k,r,len = 0,row,col,count; 1656 PetscInt *lrows,*owners = A->rmap->range; 1657 PetscSFNode *rrows; 1658 PetscSF sf; 1659 const PetscScalar *xx; 1660 PetscScalar *bb,*mask; 1661 Vec xmask,lmask; 1662 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)l->B->data; 1663 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1664 PetscScalar *aa; 1665 1666 PetscFunctionBegin; 1667 /* Create SF where leaves are input rows and roots are owned rows */ 1668 PetscCall(PetscMalloc1(n, &lrows)); 1669 for (r = 0; r < n; ++r) lrows[r] = -1; 1670 PetscCall(PetscMalloc1(N, &rrows)); 1671 for (r = 0; r < N; ++r) { 1672 const PetscInt idx = rows[r]; 1673 PetscCheck(idx >= 0 && A->rmap->N > idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N); 1674 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1675 PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p)); 1676 } 1677 rrows[r].rank = p; 1678 rrows[r].index = rows[r] - owners[p]; 1679 } 1680 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf)); 1681 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 1682 /* Collect flags for rows to be zeroed */ 1683 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 1684 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 1685 PetscCall(PetscSFDestroy(&sf)); 1686 /* Compress and put in row numbers */ 1687 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1688 /* zero diagonal part of matrix */ 1689 PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b)); 1690 /* handle off diagonal part of matrix */ 1691 PetscCall(MatCreateVecs(A,&xmask,NULL)); 1692 PetscCall(VecDuplicate(l->lvec,&lmask)); 1693 PetscCall(VecGetArray(xmask,&bb)); 1694 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1695 PetscCall(VecRestoreArray(xmask,&bb)); 1696 PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 1697 PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 1698 PetscCall(VecDestroy(&xmask)); 1699 if (x) { 1700 PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1701 PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1702 PetscCall(VecGetArrayRead(l->lvec,&xx)); 1703 PetscCall(VecGetArray(b,&bb)); 1704 } 1705 PetscCall(VecGetArray(lmask,&mask)); 1706 /* remove zeroed rows of off diagonal matrix */ 1707 for (i = 0; i < len; ++i) { 1708 row = lrows[i]; 1709 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1710 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 1711 for (k = 0; k < count; ++k) { 1712 aa[0] = 0.0; 1713 aa += bs; 1714 } 1715 } 1716 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1717 for (i = 0; i < l->B->rmap->N; ++i) { 1718 row = i/bs; 1719 for (j = baij->i[row]; j < baij->i[row+1]; ++j) { 1720 for (k = 0; k < bs; ++k) { 1721 col = bs*baij->j[j] + k; 1722 if (PetscAbsScalar(mask[col])) { 1723 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 1724 if (x) bb[i] -= aa[0]*xx[col]; 1725 aa[0] = 0.0; 1726 } 1727 } 1728 } 1729 } 1730 if (x) { 1731 PetscCall(VecRestoreArray(b,&bb)); 1732 PetscCall(VecRestoreArrayRead(l->lvec,&xx)); 1733 } 1734 PetscCall(VecRestoreArray(lmask,&mask)); 1735 PetscCall(VecDestroy(&lmask)); 1736 PetscCall(PetscFree(lrows)); 1737 1738 /* only change matrix nonzero state if pattern was allowed to be changed */ 1739 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1740 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1741 PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A))); 1742 } 1743 PetscFunctionReturn(0); 1744 } 1745 1746 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1747 { 1748 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1749 1750 PetscFunctionBegin; 1751 PetscCall(MatSetUnfactored(a->A)); 1752 PetscFunctionReturn(0); 1753 } 1754 1755 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 1756 1757 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1758 { 1759 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1760 Mat a,b,c,d; 1761 PetscBool flg; 1762 1763 PetscFunctionBegin; 1764 a = matA->A; b = matA->B; 1765 c = matB->A; d = matB->B; 1766 1767 PetscCall(MatEqual(a,c,&flg)); 1768 if (flg) { 1769 PetscCall(MatEqual(b,d,&flg)); 1770 } 1771 PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 1772 PetscFunctionReturn(0); 1773 } 1774 1775 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1776 { 1777 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1778 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 1779 1780 PetscFunctionBegin; 1781 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1782 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1783 PetscCall(MatCopy_Basic(A,B,str)); 1784 } else { 1785 PetscCall(MatCopy(a->A,b->A,str)); 1786 PetscCall(MatCopy(a->B,b->B,str)); 1787 } 1788 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1789 PetscFunctionReturn(0); 1790 } 1791 1792 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1793 { 1794 PetscFunctionBegin; 1795 PetscCall(MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL)); 1796 PetscFunctionReturn(0); 1797 } 1798 1799 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 1800 { 1801 PetscInt bs = Y->rmap->bs,m = Y->rmap->N/bs; 1802 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 1803 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 1804 1805 PetscFunctionBegin; 1806 PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz)); 1807 PetscFunctionReturn(0); 1808 } 1809 1810 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1811 { 1812 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 1813 PetscBLASInt bnz,one=1; 1814 Mat_SeqBAIJ *x,*y; 1815 PetscInt bs2 = Y->rmap->bs*Y->rmap->bs; 1816 1817 PetscFunctionBegin; 1818 if (str == SAME_NONZERO_PATTERN) { 1819 PetscScalar alpha = a; 1820 x = (Mat_SeqBAIJ*)xx->A->data; 1821 y = (Mat_SeqBAIJ*)yy->A->data; 1822 PetscCall(PetscBLASIntCast(x->nz*bs2,&bnz)); 1823 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1824 x = (Mat_SeqBAIJ*)xx->B->data; 1825 y = (Mat_SeqBAIJ*)yy->B->data; 1826 PetscCall(PetscBLASIntCast(x->nz*bs2,&bnz)); 1827 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1828 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1829 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1830 PetscCall(MatAXPY_Basic(Y,a,X,str)); 1831 } else { 1832 Mat B; 1833 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1834 PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d)); 1835 PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o)); 1836 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B)); 1837 PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name)); 1838 PetscCall(MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N)); 1839 PetscCall(MatSetBlockSizesFromMats(B,Y,Y)); 1840 PetscCall(MatSetType(B,MATMPIBAIJ)); 1841 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d)); 1842 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o)); 1843 PetscCall(MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o)); 1844 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1845 PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str)); 1846 PetscCall(MatHeaderMerge(Y,&B)); 1847 PetscCall(PetscFree(nnz_d)); 1848 PetscCall(PetscFree(nnz_o)); 1849 } 1850 PetscFunctionReturn(0); 1851 } 1852 1853 PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) 1854 { 1855 PetscFunctionBegin; 1856 if (PetscDefined(USE_COMPLEX)) { 1857 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1858 1859 PetscCall(MatConjugate_SeqBAIJ(a->A)); 1860 PetscCall(MatConjugate_SeqBAIJ(a->B)); 1861 } 1862 PetscFunctionReturn(0); 1863 } 1864 1865 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1866 { 1867 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1868 1869 PetscFunctionBegin; 1870 PetscCall(MatRealPart(a->A)); 1871 PetscCall(MatRealPart(a->B)); 1872 PetscFunctionReturn(0); 1873 } 1874 1875 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1876 { 1877 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1878 1879 PetscFunctionBegin; 1880 PetscCall(MatImaginaryPart(a->A)); 1881 PetscCall(MatImaginaryPart(a->B)); 1882 PetscFunctionReturn(0); 1883 } 1884 1885 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1886 { 1887 IS iscol_local; 1888 PetscInt csize; 1889 1890 PetscFunctionBegin; 1891 PetscCall(ISGetLocalSize(iscol,&csize)); 1892 if (call == MAT_REUSE_MATRIX) { 1893 PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local)); 1894 PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1895 } else { 1896 PetscCall(ISAllGather(iscol,&iscol_local)); 1897 } 1898 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat)); 1899 if (call == MAT_INITIAL_MATRIX) { 1900 PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local)); 1901 PetscCall(ISDestroy(&iscol_local)); 1902 } 1903 PetscFunctionReturn(0); 1904 } 1905 1906 /* 1907 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1908 in local and then by concatenating the local matrices the end result. 1909 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1910 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1911 */ 1912 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 1913 { 1914 PetscMPIInt rank,size; 1915 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 1916 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 1917 Mat M,Mreuse; 1918 MatScalar *vwork,*aa; 1919 MPI_Comm comm; 1920 IS isrow_new, iscol_new; 1921 Mat_SeqBAIJ *aij; 1922 1923 PetscFunctionBegin; 1924 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 1925 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 1926 PetscCallMPI(MPI_Comm_size(comm,&size)); 1927 /* The compression and expansion should be avoided. Doesn't point 1928 out errors, might change the indices, hence buggey */ 1929 PetscCall(ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new)); 1930 PetscCall(ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new)); 1931 1932 if (call == MAT_REUSE_MATRIX) { 1933 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse)); 1934 PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1935 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse)); 1936 } else { 1937 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse)); 1938 } 1939 PetscCall(ISDestroy(&isrow_new)); 1940 PetscCall(ISDestroy(&iscol_new)); 1941 /* 1942 m - number of local rows 1943 n - number of columns (same on all processors) 1944 rstart - first row in new global matrix generated 1945 */ 1946 PetscCall(MatGetBlockSize(mat,&bs)); 1947 PetscCall(MatGetSize(Mreuse,&m,&n)); 1948 m = m/bs; 1949 n = n/bs; 1950 1951 if (call == MAT_INITIAL_MATRIX) { 1952 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1953 ii = aij->i; 1954 jj = aij->j; 1955 1956 /* 1957 Determine the number of non-zeros in the diagonal and off-diagonal 1958 portions of the matrix in order to do correct preallocation 1959 */ 1960 1961 /* first get start and end of "diagonal" columns */ 1962 if (csize == PETSC_DECIDE) { 1963 PetscCall(ISGetSize(isrow,&mglobal)); 1964 if (mglobal == n*bs) { /* square matrix */ 1965 nlocal = m; 1966 } else { 1967 nlocal = n/size + ((n % size) > rank); 1968 } 1969 } else { 1970 nlocal = csize/bs; 1971 } 1972 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 1973 rstart = rend - nlocal; 1974 PetscCheck(rank != size - 1 || rend == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n); 1975 1976 /* next, compute all the lengths */ 1977 PetscCall(PetscMalloc2(m+1,&dlens,m+1,&olens)); 1978 for (i=0; i<m; i++) { 1979 jend = ii[i+1] - ii[i]; 1980 olen = 0; 1981 dlen = 0; 1982 for (j=0; j<jend; j++) { 1983 if (*jj < rstart || *jj >= rend) olen++; 1984 else dlen++; 1985 jj++; 1986 } 1987 olens[i] = olen; 1988 dlens[i] = dlen; 1989 } 1990 PetscCall(MatCreate(comm,&M)); 1991 PetscCall(MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n)); 1992 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 1993 PetscCall(MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens)); 1994 PetscCall(MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens)); 1995 PetscCall(PetscFree2(dlens,olens)); 1996 } else { 1997 PetscInt ml,nl; 1998 1999 M = *newmat; 2000 PetscCall(MatGetLocalSize(M,&ml,&nl)); 2001 PetscCheck(ml == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2002 PetscCall(MatZeroEntries(M)); 2003 /* 2004 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2005 rather than the slower MatSetValues(). 2006 */ 2007 M->was_assembled = PETSC_TRUE; 2008 M->assembled = PETSC_FALSE; 2009 } 2010 PetscCall(MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE)); 2011 PetscCall(MatGetOwnershipRange(M,&rstart,&rend)); 2012 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2013 ii = aij->i; 2014 jj = aij->j; 2015 aa = aij->a; 2016 for (i=0; i<m; i++) { 2017 row = rstart/bs + i; 2018 nz = ii[i+1] - ii[i]; 2019 cwork = jj; jj += nz; 2020 vwork = aa; aa += nz*bs*bs; 2021 PetscCall(MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES)); 2022 } 2023 2024 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 2025 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 2026 *newmat = M; 2027 2028 /* save submatrix used in processor for next request */ 2029 if (call == MAT_INITIAL_MATRIX) { 2030 PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse)); 2031 PetscCall(PetscObjectDereference((PetscObject)Mreuse)); 2032 } 2033 PetscFunctionReturn(0); 2034 } 2035 2036 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2037 { 2038 MPI_Comm comm,pcomm; 2039 PetscInt clocal_size,nrows; 2040 const PetscInt *rows; 2041 PetscMPIInt size; 2042 IS crowp,lcolp; 2043 2044 PetscFunctionBegin; 2045 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 2046 /* make a collective version of 'rowp' */ 2047 PetscCall(PetscObjectGetComm((PetscObject)rowp,&pcomm)); 2048 if (pcomm==comm) { 2049 crowp = rowp; 2050 } else { 2051 PetscCall(ISGetSize(rowp,&nrows)); 2052 PetscCall(ISGetIndices(rowp,&rows)); 2053 PetscCall(ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp)); 2054 PetscCall(ISRestoreIndices(rowp,&rows)); 2055 } 2056 PetscCall(ISSetPermutation(crowp)); 2057 /* make a local version of 'colp' */ 2058 PetscCall(PetscObjectGetComm((PetscObject)colp,&pcomm)); 2059 PetscCallMPI(MPI_Comm_size(pcomm,&size)); 2060 if (size==1) { 2061 lcolp = colp; 2062 } else { 2063 PetscCall(ISAllGather(colp,&lcolp)); 2064 } 2065 PetscCall(ISSetPermutation(lcolp)); 2066 /* now we just get the submatrix */ 2067 PetscCall(MatGetLocalSize(A,NULL,&clocal_size)); 2068 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B)); 2069 /* clean up */ 2070 if (pcomm!=comm) { 2071 PetscCall(ISDestroy(&crowp)); 2072 } 2073 if (size>1) { 2074 PetscCall(ISDestroy(&lcolp)); 2075 } 2076 PetscFunctionReturn(0); 2077 } 2078 2079 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2080 { 2081 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2082 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2083 2084 PetscFunctionBegin; 2085 if (nghosts) *nghosts = B->nbs; 2086 if (ghosts) *ghosts = baij->garray; 2087 PetscFunctionReturn(0); 2088 } 2089 2090 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2091 { 2092 Mat B; 2093 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2094 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2095 Mat_SeqAIJ *b; 2096 PetscMPIInt size,rank,*recvcounts = NULL,*displs = NULL; 2097 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2098 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2099 2100 PetscFunctionBegin; 2101 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 2102 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank)); 2103 2104 /* ---------------------------------------------------------------- 2105 Tell every processor the number of nonzeros per row 2106 */ 2107 PetscCall(PetscMalloc1(A->rmap->N/bs,&lens)); 2108 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2109 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2110 } 2111 PetscCall(PetscMalloc1(2*size,&recvcounts)); 2112 displs = recvcounts + size; 2113 for (i=0; i<size; i++) { 2114 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2115 displs[i] = A->rmap->range[i]/bs; 2116 } 2117 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A))); 2118 /* --------------------------------------------------------------- 2119 Create the sequential matrix of the same type as the local block diagonal 2120 */ 2121 PetscCall(MatCreate(PETSC_COMM_SELF,&B)); 2122 PetscCall(MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE)); 2123 PetscCall(MatSetType(B,MATSEQAIJ)); 2124 PetscCall(MatSeqAIJSetPreallocation(B,0,lens)); 2125 b = (Mat_SeqAIJ*)B->data; 2126 2127 /*-------------------------------------------------------------------- 2128 Copy my part of matrix column indices over 2129 */ 2130 sendcount = ad->nz + bd->nz; 2131 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2132 a_jsendbuf = ad->j; 2133 b_jsendbuf = bd->j; 2134 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2135 cnt = 0; 2136 for (i=0; i<n; i++) { 2137 2138 /* put in lower diagonal portion */ 2139 m = bd->i[i+1] - bd->i[i]; 2140 while (m > 0) { 2141 /* is it above diagonal (in bd (compressed) numbering) */ 2142 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2143 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2144 m--; 2145 } 2146 2147 /* put in diagonal portion */ 2148 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2149 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2150 } 2151 2152 /* put in upper diagonal portion */ 2153 while (m-- > 0) { 2154 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2155 } 2156 } 2157 PetscCheck(cnt == sendcount,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT,sendcount,cnt); 2158 2159 /*-------------------------------------------------------------------- 2160 Gather all column indices to all processors 2161 */ 2162 for (i=0; i<size; i++) { 2163 recvcounts[i] = 0; 2164 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2165 recvcounts[i] += lens[j]; 2166 } 2167 } 2168 displs[0] = 0; 2169 for (i=1; i<size; i++) { 2170 displs[i] = displs[i-1] + recvcounts[i-1]; 2171 } 2172 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A))); 2173 /*-------------------------------------------------------------------- 2174 Assemble the matrix into useable form (note numerical values not yet set) 2175 */ 2176 /* set the b->ilen (length of each row) values */ 2177 PetscCall(PetscArraycpy(b->ilen,lens,A->rmap->N/bs)); 2178 /* set the b->i indices */ 2179 b->i[0] = 0; 2180 for (i=1; i<=A->rmap->N/bs; i++) { 2181 b->i[i] = b->i[i-1] + lens[i-1]; 2182 } 2183 PetscCall(PetscFree(lens)); 2184 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 2185 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 2186 PetscCall(PetscFree(recvcounts)); 2187 2188 PetscCall(MatPropagateSymmetryOptions(A,B)); 2189 *newmat = B; 2190 PetscFunctionReturn(0); 2191 } 2192 2193 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2194 { 2195 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2196 Vec bb1 = NULL; 2197 2198 PetscFunctionBegin; 2199 if (flag == SOR_APPLY_UPPER) { 2200 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2201 PetscFunctionReturn(0); 2202 } 2203 2204 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2205 PetscCall(VecDuplicate(bb,&bb1)); 2206 } 2207 2208 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2209 if (flag & SOR_ZERO_INITIAL_GUESS) { 2210 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2211 its--; 2212 } 2213 2214 while (its--) { 2215 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2216 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2217 2218 /* update rhs: bb1 = bb - B*x */ 2219 PetscCall(VecScale(mat->lvec,-1.0)); 2220 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 2221 2222 /* local sweep */ 2223 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx)); 2224 } 2225 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2226 if (flag & SOR_ZERO_INITIAL_GUESS) { 2227 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2228 its--; 2229 } 2230 while (its--) { 2231 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2232 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2233 2234 /* update rhs: bb1 = bb - B*x */ 2235 PetscCall(VecScale(mat->lvec,-1.0)); 2236 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 2237 2238 /* local sweep */ 2239 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx)); 2240 } 2241 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2242 if (flag & SOR_ZERO_INITIAL_GUESS) { 2243 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2244 its--; 2245 } 2246 while (its--) { 2247 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2248 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2249 2250 /* update rhs: bb1 = bb - B*x */ 2251 PetscCall(VecScale(mat->lvec,-1.0)); 2252 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 2253 2254 /* local sweep */ 2255 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx)); 2256 } 2257 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2258 2259 PetscCall(VecDestroy(&bb1)); 2260 PetscFunctionReturn(0); 2261 } 2262 2263 PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A,PetscInt type,PetscReal *reductions) 2264 { 2265 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data; 2266 PetscInt m,N,i,*garray = aij->garray; 2267 PetscInt ib,jb,bs = A->rmap->bs; 2268 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data; 2269 MatScalar *a_val = a_aij->a; 2270 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data; 2271 MatScalar *b_val = b_aij->a; 2272 PetscReal *work; 2273 2274 PetscFunctionBegin; 2275 PetscCall(MatGetSize(A,&m,&N)); 2276 PetscCall(PetscCalloc1(N,&work)); 2277 if (type == NORM_2) { 2278 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2279 for (jb=0; jb<bs; jb++) { 2280 for (ib=0; ib<bs; ib++) { 2281 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2282 a_val++; 2283 } 2284 } 2285 } 2286 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2287 for (jb=0; jb<bs; jb++) { 2288 for (ib=0; ib<bs; ib++) { 2289 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2290 b_val++; 2291 } 2292 } 2293 } 2294 } else if (type == NORM_1) { 2295 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2296 for (jb=0; jb<bs; jb++) { 2297 for (ib=0; ib<bs; ib++) { 2298 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2299 a_val++; 2300 } 2301 } 2302 } 2303 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2304 for (jb=0; jb<bs; jb++) { 2305 for (ib=0; ib<bs; ib++) { 2306 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2307 b_val++; 2308 } 2309 } 2310 } 2311 } else if (type == NORM_INFINITY) { 2312 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2313 for (jb=0; jb<bs; jb++) { 2314 for (ib=0; ib<bs; ib++) { 2315 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2316 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2317 a_val++; 2318 } 2319 } 2320 } 2321 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2322 for (jb=0; jb<bs; jb++) { 2323 for (ib=0; ib<bs; ib++) { 2324 int col = garray[b_aij->j[i]] * bs + jb; 2325 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2326 b_val++; 2327 } 2328 } 2329 } 2330 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2331 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2332 for (jb=0; jb<bs; jb++) { 2333 for (ib=0; ib<bs; ib++) { 2334 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 2335 a_val++; 2336 } 2337 } 2338 } 2339 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2340 for (jb=0; jb<bs; jb++) { 2341 for (ib=0; ib<bs; ib++) { 2342 work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val); 2343 b_val++; 2344 } 2345 } 2346 } 2347 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2348 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2349 for (jb=0; jb<bs; jb++) { 2350 for (ib=0; ib<bs; ib++) { 2351 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 2352 a_val++; 2353 } 2354 } 2355 } 2356 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2357 for (jb=0; jb<bs; jb++) { 2358 for (ib=0; ib<bs; ib++) { 2359 work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val); 2360 b_val++; 2361 } 2362 } 2363 } 2364 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 2365 if (type == NORM_INFINITY) { 2366 PetscCall(MPIU_Allreduce(work,reductions,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A))); 2367 } else { 2368 PetscCall(MPIU_Allreduce(work,reductions,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 2369 } 2370 PetscCall(PetscFree(work)); 2371 if (type == NORM_2) { 2372 for (i=0; i<N; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2373 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2374 for (i=0; i<N; i++) reductions[i] /= m; 2375 } 2376 PetscFunctionReturn(0); 2377 } 2378 2379 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2380 { 2381 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2382 2383 PetscFunctionBegin; 2384 PetscCall(MatInvertBlockDiagonal(a->A,values)); 2385 A->factorerrortype = a->A->factorerrortype; 2386 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2387 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2388 PetscFunctionReturn(0); 2389 } 2390 2391 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a) 2392 { 2393 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ*)Y->data; 2394 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)maij->A->data; 2395 2396 PetscFunctionBegin; 2397 if (!Y->preallocated) { 2398 PetscCall(MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL)); 2399 } else if (!aij->nz) { 2400 PetscInt nonew = aij->nonew; 2401 PetscCall(MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL)); 2402 aij->nonew = nonew; 2403 } 2404 PetscCall(MatShift_Basic(Y,a)); 2405 PetscFunctionReturn(0); 2406 } 2407 2408 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool *missing,PetscInt *d) 2409 { 2410 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2411 2412 PetscFunctionBegin; 2413 PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2414 PetscCall(MatMissingDiagonal(a->A,missing,d)); 2415 if (d) { 2416 PetscInt rstart; 2417 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 2418 *d += rstart/A->rmap->bs; 2419 2420 } 2421 PetscFunctionReturn(0); 2422 } 2423 2424 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2425 { 2426 PetscFunctionBegin; 2427 *a = ((Mat_MPIBAIJ*)A->data)->A; 2428 PetscFunctionReturn(0); 2429 } 2430 2431 /* -------------------------------------------------------------------*/ 2432 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2433 MatGetRow_MPIBAIJ, 2434 MatRestoreRow_MPIBAIJ, 2435 MatMult_MPIBAIJ, 2436 /* 4*/ MatMultAdd_MPIBAIJ, 2437 MatMultTranspose_MPIBAIJ, 2438 MatMultTransposeAdd_MPIBAIJ, 2439 NULL, 2440 NULL, 2441 NULL, 2442 /*10*/ NULL, 2443 NULL, 2444 NULL, 2445 MatSOR_MPIBAIJ, 2446 MatTranspose_MPIBAIJ, 2447 /*15*/ MatGetInfo_MPIBAIJ, 2448 MatEqual_MPIBAIJ, 2449 MatGetDiagonal_MPIBAIJ, 2450 MatDiagonalScale_MPIBAIJ, 2451 MatNorm_MPIBAIJ, 2452 /*20*/ MatAssemblyBegin_MPIBAIJ, 2453 MatAssemblyEnd_MPIBAIJ, 2454 MatSetOption_MPIBAIJ, 2455 MatZeroEntries_MPIBAIJ, 2456 /*24*/ MatZeroRows_MPIBAIJ, 2457 NULL, 2458 NULL, 2459 NULL, 2460 NULL, 2461 /*29*/ MatSetUp_MPIBAIJ, 2462 NULL, 2463 NULL, 2464 MatGetDiagonalBlock_MPIBAIJ, 2465 NULL, 2466 /*34*/ MatDuplicate_MPIBAIJ, 2467 NULL, 2468 NULL, 2469 NULL, 2470 NULL, 2471 /*39*/ MatAXPY_MPIBAIJ, 2472 MatCreateSubMatrices_MPIBAIJ, 2473 MatIncreaseOverlap_MPIBAIJ, 2474 MatGetValues_MPIBAIJ, 2475 MatCopy_MPIBAIJ, 2476 /*44*/ NULL, 2477 MatScale_MPIBAIJ, 2478 MatShift_MPIBAIJ, 2479 NULL, 2480 MatZeroRowsColumns_MPIBAIJ, 2481 /*49*/ NULL, 2482 NULL, 2483 NULL, 2484 NULL, 2485 NULL, 2486 /*54*/ MatFDColoringCreate_MPIXAIJ, 2487 NULL, 2488 MatSetUnfactored_MPIBAIJ, 2489 MatPermute_MPIBAIJ, 2490 MatSetValuesBlocked_MPIBAIJ, 2491 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2492 MatDestroy_MPIBAIJ, 2493 MatView_MPIBAIJ, 2494 NULL, 2495 NULL, 2496 /*64*/ NULL, 2497 NULL, 2498 NULL, 2499 NULL, 2500 NULL, 2501 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2502 NULL, 2503 NULL, 2504 NULL, 2505 NULL, 2506 /*74*/ NULL, 2507 MatFDColoringApply_BAIJ, 2508 NULL, 2509 NULL, 2510 NULL, 2511 /*79*/ NULL, 2512 NULL, 2513 NULL, 2514 NULL, 2515 MatLoad_MPIBAIJ, 2516 /*84*/ NULL, 2517 NULL, 2518 NULL, 2519 NULL, 2520 NULL, 2521 /*89*/ NULL, 2522 NULL, 2523 NULL, 2524 NULL, 2525 NULL, 2526 /*94*/ NULL, 2527 NULL, 2528 NULL, 2529 NULL, 2530 NULL, 2531 /*99*/ NULL, 2532 NULL, 2533 NULL, 2534 MatConjugate_MPIBAIJ, 2535 NULL, 2536 /*104*/NULL, 2537 MatRealPart_MPIBAIJ, 2538 MatImaginaryPart_MPIBAIJ, 2539 NULL, 2540 NULL, 2541 /*109*/NULL, 2542 NULL, 2543 NULL, 2544 NULL, 2545 MatMissingDiagonal_MPIBAIJ, 2546 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2547 NULL, 2548 MatGetGhosts_MPIBAIJ, 2549 NULL, 2550 NULL, 2551 /*119*/NULL, 2552 NULL, 2553 NULL, 2554 NULL, 2555 MatGetMultiProcBlock_MPIBAIJ, 2556 /*124*/NULL, 2557 MatGetColumnReductions_MPIBAIJ, 2558 MatInvertBlockDiagonal_MPIBAIJ, 2559 NULL, 2560 NULL, 2561 /*129*/ NULL, 2562 NULL, 2563 NULL, 2564 NULL, 2565 NULL, 2566 /*134*/ NULL, 2567 NULL, 2568 NULL, 2569 NULL, 2570 NULL, 2571 /*139*/ MatSetBlockSizes_Default, 2572 NULL, 2573 NULL, 2574 MatFDColoringSetUp_MPIXAIJ, 2575 NULL, 2576 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ, 2577 NULL, 2578 NULL, 2579 NULL, 2580 NULL, 2581 NULL 2582 }; 2583 2584 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 2585 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 2586 2587 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2588 { 2589 PetscInt m,rstart,cstart,cend; 2590 PetscInt i,j,dlen,olen,nz,nz_max=0,*d_nnz=NULL,*o_nnz=NULL; 2591 const PetscInt *JJ =NULL; 2592 PetscScalar *values=NULL; 2593 PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented; 2594 PetscBool nooffprocentries; 2595 2596 PetscFunctionBegin; 2597 PetscCall(PetscLayoutSetBlockSize(B->rmap,bs)); 2598 PetscCall(PetscLayoutSetBlockSize(B->cmap,bs)); 2599 PetscCall(PetscLayoutSetUp(B->rmap)); 2600 PetscCall(PetscLayoutSetUp(B->cmap)); 2601 PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs)); 2602 m = B->rmap->n/bs; 2603 rstart = B->rmap->rstart/bs; 2604 cstart = B->cmap->rstart/bs; 2605 cend = B->cmap->rend/bs; 2606 2607 PetscCheck(!ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %" PetscInt_FMT,ii[0]); 2608 PetscCall(PetscMalloc2(m,&d_nnz,m,&o_nnz)); 2609 for (i=0; i<m; i++) { 2610 nz = ii[i+1] - ii[i]; 2611 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT,i,nz); 2612 nz_max = PetscMax(nz_max,nz); 2613 dlen = 0; 2614 olen = 0; 2615 JJ = jj + ii[i]; 2616 for (j=0; j<nz; j++) { 2617 if (*JJ < cstart || *JJ >= cend) olen++; 2618 else dlen++; 2619 JJ++; 2620 } 2621 d_nnz[i] = dlen; 2622 o_nnz[i] = olen; 2623 } 2624 PetscCall(MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz)); 2625 PetscCall(PetscFree2(d_nnz,o_nnz)); 2626 2627 values = (PetscScalar*)V; 2628 if (!values) { 2629 PetscCall(PetscCalloc1(bs*bs*nz_max,&values)); 2630 } 2631 for (i=0; i<m; i++) { 2632 PetscInt row = i + rstart; 2633 PetscInt ncols = ii[i+1] - ii[i]; 2634 const PetscInt *icols = jj + ii[i]; 2635 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2636 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2637 PetscCall(MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES)); 2638 } else { /* block ordering does not match so we can only insert one block at a time. */ 2639 PetscInt j; 2640 for (j=0; j<ncols; j++) { 2641 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2642 PetscCall(MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES)); 2643 } 2644 } 2645 } 2646 2647 if (!V) PetscCall(PetscFree(values)); 2648 nooffprocentries = B->nooffprocentries; 2649 B->nooffprocentries = PETSC_TRUE; 2650 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 2651 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 2652 B->nooffprocentries = nooffprocentries; 2653 2654 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 2655 PetscFunctionReturn(0); 2656 } 2657 2658 /*@C 2659 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values 2660 2661 Collective 2662 2663 Input Parameters: 2664 + B - the matrix 2665 . bs - the block size 2666 . i - the indices into j for the start of each local row (starts with zero) 2667 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2668 - v - optional values in the matrix 2669 2670 Level: advanced 2671 2672 Notes: 2673 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 2674 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 2675 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2676 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2677 block column and the second index is over columns within a block. 2678 2679 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2680 2681 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MPIBAIJ` 2682 @*/ 2683 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2684 { 2685 PetscFunctionBegin; 2686 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2687 PetscValidType(B,1); 2688 PetscValidLogicalCollectiveInt(B,bs,2); 2689 PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v)); 2690 PetscFunctionReturn(0); 2691 } 2692 2693 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2694 { 2695 Mat_MPIBAIJ *b; 2696 PetscInt i; 2697 PetscMPIInt size; 2698 2699 PetscFunctionBegin; 2700 PetscCall(MatSetBlockSize(B,PetscAbs(bs))); 2701 PetscCall(PetscLayoutSetUp(B->rmap)); 2702 PetscCall(PetscLayoutSetUp(B->cmap)); 2703 PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs)); 2704 2705 if (d_nnz) { 2706 for (i=0; i<B->rmap->n/bs; i++) { 2707 PetscCheck(d_nnz[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,d_nnz[i]); 2708 } 2709 } 2710 if (o_nnz) { 2711 for (i=0; i<B->rmap->n/bs; i++) { 2712 PetscCheck(o_nnz[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,o_nnz[i]); 2713 } 2714 } 2715 2716 b = (Mat_MPIBAIJ*)B->data; 2717 b->bs2 = bs*bs; 2718 b->mbs = B->rmap->n/bs; 2719 b->nbs = B->cmap->n/bs; 2720 b->Mbs = B->rmap->N/bs; 2721 b->Nbs = B->cmap->N/bs; 2722 2723 for (i=0; i<=b->size; i++) { 2724 b->rangebs[i] = B->rmap->range[i]/bs; 2725 } 2726 b->rstartbs = B->rmap->rstart/bs; 2727 b->rendbs = B->rmap->rend/bs; 2728 b->cstartbs = B->cmap->rstart/bs; 2729 b->cendbs = B->cmap->rend/bs; 2730 2731 #if defined(PETSC_USE_CTABLE) 2732 PetscCall(PetscTableDestroy(&b->colmap)); 2733 #else 2734 PetscCall(PetscFree(b->colmap)); 2735 #endif 2736 PetscCall(PetscFree(b->garray)); 2737 PetscCall(VecDestroy(&b->lvec)); 2738 PetscCall(VecScatterDestroy(&b->Mvctx)); 2739 2740 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2741 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 2742 PetscCall(MatDestroy(&b->B)); 2743 PetscCall(MatCreate(PETSC_COMM_SELF,&b->B)); 2744 PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0)); 2745 PetscCall(MatSetType(b->B,MATSEQBAIJ)); 2746 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B)); 2747 2748 if (!B->preallocated) { 2749 PetscCall(MatCreate(PETSC_COMM_SELF,&b->A)); 2750 PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n)); 2751 PetscCall(MatSetType(b->A,MATSEQBAIJ)); 2752 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A)); 2753 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash)); 2754 } 2755 2756 PetscCall(MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz)); 2757 PetscCall(MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz)); 2758 B->preallocated = PETSC_TRUE; 2759 B->was_assembled = PETSC_FALSE; 2760 B->assembled = PETSC_FALSE; 2761 PetscFunctionReturn(0); 2762 } 2763 2764 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2765 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2766 2767 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 2768 { 2769 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2770 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2771 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2772 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2773 2774 PetscFunctionBegin; 2775 PetscCall(PetscMalloc1(M+1,&ii)); 2776 ii[0] = 0; 2777 for (i=0; i<M; i++) { 2778 PetscCheck((id[i+1] - id[i]) >= 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT,i,id[i],id[i+1]); 2779 PetscCheck((io[i+1] - io[i]) >= 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT,i,io[i],io[i+1]); 2780 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2781 /* remove one from count of matrix has diagonal */ 2782 for (j=id[i]; j<id[i+1]; j++) { 2783 if (jd[j] == i) {ii[i+1]--;break;} 2784 } 2785 } 2786 PetscCall(PetscMalloc1(ii[M],&jj)); 2787 cnt = 0; 2788 for (i=0; i<M; i++) { 2789 for (j=io[i]; j<io[i+1]; j++) { 2790 if (garray[jo[j]] > rstart) break; 2791 jj[cnt++] = garray[jo[j]]; 2792 } 2793 for (k=id[i]; k<id[i+1]; k++) { 2794 if (jd[k] != i) { 2795 jj[cnt++] = rstart + jd[k]; 2796 } 2797 } 2798 for (; j<io[i+1]; j++) { 2799 jj[cnt++] = garray[jo[j]]; 2800 } 2801 } 2802 PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj)); 2803 PetscFunctionReturn(0); 2804 } 2805 2806 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2807 2808 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 2809 2810 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 2811 { 2812 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2813 Mat_MPIAIJ *b; 2814 Mat B; 2815 2816 PetscFunctionBegin; 2817 PetscCheck(A->assembled,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 2818 2819 if (reuse == MAT_REUSE_MATRIX) { 2820 B = *newmat; 2821 } else { 2822 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 2823 PetscCall(MatSetType(B,MATMPIAIJ)); 2824 PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 2825 PetscCall(MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs)); 2826 PetscCall(MatSeqAIJSetPreallocation(B,0,NULL)); 2827 PetscCall(MatMPIAIJSetPreallocation(B,0,NULL,0,NULL)); 2828 } 2829 b = (Mat_MPIAIJ*) B->data; 2830 2831 if (reuse == MAT_REUSE_MATRIX) { 2832 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A)); 2833 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B)); 2834 } else { 2835 PetscCall(MatDestroy(&b->A)); 2836 PetscCall(MatDestroy(&b->B)); 2837 PetscCall(MatDisAssemble_MPIBAIJ(A)); 2838 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A)); 2839 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B)); 2840 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 2841 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 2842 } 2843 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 2844 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 2845 2846 if (reuse == MAT_INPLACE_MATRIX) { 2847 PetscCall(MatHeaderReplace(A,&B)); 2848 } else { 2849 *newmat = B; 2850 } 2851 PetscFunctionReturn(0); 2852 } 2853 2854 /*MC 2855 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2856 2857 Options Database Keys: 2858 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2859 . -mat_block_size <bs> - set the blocksize used to store the matrix 2860 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 2861 - -mat_use_hash_table <fact> - set hash table factor 2862 2863 Level: beginner 2864 2865 Notes: 2866 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 2867 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 2868 2869 .seealso: `MatCreateBAIJ` 2870 M*/ 2871 2872 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 2873 2874 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2875 { 2876 Mat_MPIBAIJ *b; 2877 PetscBool flg = PETSC_FALSE; 2878 2879 PetscFunctionBegin; 2880 PetscCall(PetscNewLog(B,&b)); 2881 B->data = (void*)b; 2882 2883 PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps))); 2884 B->assembled = PETSC_FALSE; 2885 2886 B->insertmode = NOT_SET_VALUES; 2887 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank)); 2888 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size)); 2889 2890 /* build local table of row and column ownerships */ 2891 PetscCall(PetscMalloc1(b->size+1,&b->rangebs)); 2892 2893 /* build cache for off array entries formed */ 2894 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash)); 2895 2896 b->donotstash = PETSC_FALSE; 2897 b->colmap = NULL; 2898 b->garray = NULL; 2899 b->roworiented = PETSC_TRUE; 2900 2901 /* stuff used in block assembly */ 2902 b->barray = NULL; 2903 2904 /* stuff used for matrix vector multiply */ 2905 b->lvec = NULL; 2906 b->Mvctx = NULL; 2907 2908 /* stuff for MatGetRow() */ 2909 b->rowindices = NULL; 2910 b->rowvalues = NULL; 2911 b->getrowactive = PETSC_FALSE; 2912 2913 /* hash table stuff */ 2914 b->ht = NULL; 2915 b->hd = NULL; 2916 b->ht_size = 0; 2917 b->ht_flag = PETSC_FALSE; 2918 b->ht_fact = 0; 2919 b->ht_total_ct = 0; 2920 b->ht_insert_ct = 0; 2921 2922 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2923 b->ijonly = PETSC_FALSE; 2924 2925 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj)); 2926 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ)); 2927 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ)); 2928 #if defined(PETSC_HAVE_HYPRE) 2929 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE)); 2930 #endif 2931 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ)); 2932 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ)); 2933 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ)); 2934 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ)); 2935 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ)); 2936 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ)); 2937 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS)); 2938 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ)); 2939 2940 PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat"); 2941 PetscCall(PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg)); 2942 if (flg) { 2943 PetscReal fact = 1.39; 2944 PetscCall(MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE)); 2945 PetscCall(PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL)); 2946 if (fact <= 1.0) fact = 1.39; 2947 PetscCall(MatMPIBAIJSetHashTableFactor(B,fact)); 2948 PetscCall(PetscInfo(B,"Hash table Factor used %5.2g\n",(double)fact)); 2949 } 2950 PetscOptionsEnd(); 2951 PetscFunctionReturn(0); 2952 } 2953 2954 /*MC 2955 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2956 2957 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2958 and MATMPIBAIJ otherwise. 2959 2960 Options Database Keys: 2961 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2962 2963 Level: beginner 2964 2965 .seealso: `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 2966 M*/ 2967 2968 /*@C 2969 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2970 (block compressed row). For good matrix assembly performance 2971 the user should preallocate the matrix storage by setting the parameters 2972 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2973 performance can be increased by more than a factor of 50. 2974 2975 Collective on Mat 2976 2977 Input Parameters: 2978 + B - the matrix 2979 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2980 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2981 . d_nz - number of block nonzeros per block row in diagonal portion of local 2982 submatrix (same for all local rows) 2983 . d_nnz - array containing the number of block nonzeros in the various block rows 2984 of the in diagonal portion of the local (possibly different for each block 2985 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 2986 set it even if it is zero. 2987 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2988 submatrix (same for all local rows). 2989 - o_nnz - array containing the number of nonzeros in the various block rows of the 2990 off-diagonal portion of the local submatrix (possibly different for 2991 each block row) or NULL. 2992 2993 If the *_nnz parameter is given then the *_nz parameter is ignored 2994 2995 Options Database Keys: 2996 + -mat_block_size - size of the blocks to use 2997 - -mat_use_hash_table <fact> - set hash table factor 2998 2999 Notes: 3000 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3001 than it must be used on all processors that share the object for that argument. 3002 3003 Storage Information: 3004 For a square global matrix we define each processor's diagonal portion 3005 to be its local rows and the corresponding columns (a square submatrix); 3006 each processor's off-diagonal portion encompasses the remainder of the 3007 local matrix (a rectangular submatrix). 3008 3009 The user can specify preallocated storage for the diagonal part of 3010 the local submatrix with either d_nz or d_nnz (not both). Set 3011 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3012 memory allocation. Likewise, specify preallocated storage for the 3013 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3014 3015 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3016 the figure below we depict these three local rows and all columns (0-11). 3017 3018 .vb 3019 0 1 2 3 4 5 6 7 8 9 10 11 3020 -------------------------- 3021 row 3 |o o o d d d o o o o o o 3022 row 4 |o o o d d d o o o o o o 3023 row 5 |o o o d d d o o o o o o 3024 -------------------------- 3025 .ve 3026 3027 Thus, any entries in the d locations are stored in the d (diagonal) 3028 submatrix, and any entries in the o locations are stored in the 3029 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3030 stored simply in the MATSEQBAIJ format for compressed row storage. 3031 3032 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3033 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3034 In general, for PDE problems in which most nonzeros are near the diagonal, 3035 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3036 or you will get TERRIBLE performance; see the users' manual chapter on 3037 matrices. 3038 3039 You can call MatGetInfo() to get information on how effective the preallocation was; 3040 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3041 You can also run with the option -info and look for messages with the string 3042 malloc in them to see if additional memory allocation was needed. 3043 3044 Level: intermediate 3045 3046 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()` 3047 @*/ 3048 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3049 { 3050 PetscFunctionBegin; 3051 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3052 PetscValidType(B,1); 3053 PetscValidLogicalCollectiveInt(B,bs,2); 3054 PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz)); 3055 PetscFunctionReturn(0); 3056 } 3057 3058 /*@C 3059 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3060 (block compressed row). For good matrix assembly performance 3061 the user should preallocate the matrix storage by setting the parameters 3062 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3063 performance can be increased by more than a factor of 50. 3064 3065 Collective 3066 3067 Input Parameters: 3068 + comm - MPI communicator 3069 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3070 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3071 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3072 This value should be the same as the local size used in creating the 3073 y vector for the matrix-vector product y = Ax. 3074 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3075 This value should be the same as the local size used in creating the 3076 x vector for the matrix-vector product y = Ax. 3077 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3078 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3079 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3080 submatrix (same for all local rows) 3081 . d_nnz - array containing the number of nonzero blocks in the various block rows 3082 of the in diagonal portion of the local (possibly different for each block 3083 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3084 and set it even if it is zero. 3085 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3086 submatrix (same for all local rows). 3087 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3088 off-diagonal portion of the local submatrix (possibly different for 3089 each block row) or NULL. 3090 3091 Output Parameter: 3092 . A - the matrix 3093 3094 Options Database Keys: 3095 + -mat_block_size - size of the blocks to use 3096 - -mat_use_hash_table <fact> - set hash table factor 3097 3098 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3099 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3100 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3101 3102 Notes: 3103 If the *_nnz parameter is given then the *_nz parameter is ignored 3104 3105 A nonzero block is any block that as 1 or more nonzeros in it 3106 3107 The user MUST specify either the local or global matrix dimensions 3108 (possibly both). 3109 3110 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3111 than it must be used on all processors that share the object for that argument. 3112 3113 Storage Information: 3114 For a square global matrix we define each processor's diagonal portion 3115 to be its local rows and the corresponding columns (a square submatrix); 3116 each processor's off-diagonal portion encompasses the remainder of the 3117 local matrix (a rectangular submatrix). 3118 3119 The user can specify preallocated storage for the diagonal part of 3120 the local submatrix with either d_nz or d_nnz (not both). Set 3121 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3122 memory allocation. Likewise, specify preallocated storage for the 3123 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3124 3125 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3126 the figure below we depict these three local rows and all columns (0-11). 3127 3128 .vb 3129 0 1 2 3 4 5 6 7 8 9 10 11 3130 -------------------------- 3131 row 3 |o o o d d d o o o o o o 3132 row 4 |o o o d d d o o o o o o 3133 row 5 |o o o d d d o o o o o o 3134 -------------------------- 3135 .ve 3136 3137 Thus, any entries in the d locations are stored in the d (diagonal) 3138 submatrix, and any entries in the o locations are stored in the 3139 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3140 stored simply in the MATSEQBAIJ format for compressed row storage. 3141 3142 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3143 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3144 In general, for PDE problems in which most nonzeros are near the diagonal, 3145 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3146 or you will get TERRIBLE performance; see the users' manual chapter on 3147 matrices. 3148 3149 Level: intermediate 3150 3151 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 3152 @*/ 3153 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3154 { 3155 PetscMPIInt size; 3156 3157 PetscFunctionBegin; 3158 PetscCall(MatCreate(comm,A)); 3159 PetscCall(MatSetSizes(*A,m,n,M,N)); 3160 PetscCallMPI(MPI_Comm_size(comm,&size)); 3161 if (size > 1) { 3162 PetscCall(MatSetType(*A,MATMPIBAIJ)); 3163 PetscCall(MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz)); 3164 } else { 3165 PetscCall(MatSetType(*A,MATSEQBAIJ)); 3166 PetscCall(MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz)); 3167 } 3168 PetscFunctionReturn(0); 3169 } 3170 3171 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3172 { 3173 Mat mat; 3174 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3175 PetscInt len=0; 3176 3177 PetscFunctionBegin; 3178 *newmat = NULL; 3179 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat)); 3180 PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N)); 3181 PetscCall(MatSetType(mat,((PetscObject)matin)->type_name)); 3182 3183 mat->factortype = matin->factortype; 3184 mat->preallocated = PETSC_TRUE; 3185 mat->assembled = PETSC_TRUE; 3186 mat->insertmode = NOT_SET_VALUES; 3187 3188 a = (Mat_MPIBAIJ*)mat->data; 3189 mat->rmap->bs = matin->rmap->bs; 3190 a->bs2 = oldmat->bs2; 3191 a->mbs = oldmat->mbs; 3192 a->nbs = oldmat->nbs; 3193 a->Mbs = oldmat->Mbs; 3194 a->Nbs = oldmat->Nbs; 3195 3196 PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap)); 3197 PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap)); 3198 3199 a->size = oldmat->size; 3200 a->rank = oldmat->rank; 3201 a->donotstash = oldmat->donotstash; 3202 a->roworiented = oldmat->roworiented; 3203 a->rowindices = NULL; 3204 a->rowvalues = NULL; 3205 a->getrowactive = PETSC_FALSE; 3206 a->barray = NULL; 3207 a->rstartbs = oldmat->rstartbs; 3208 a->rendbs = oldmat->rendbs; 3209 a->cstartbs = oldmat->cstartbs; 3210 a->cendbs = oldmat->cendbs; 3211 3212 /* hash table stuff */ 3213 a->ht = NULL; 3214 a->hd = NULL; 3215 a->ht_size = 0; 3216 a->ht_flag = oldmat->ht_flag; 3217 a->ht_fact = oldmat->ht_fact; 3218 a->ht_total_ct = 0; 3219 a->ht_insert_ct = 0; 3220 3221 PetscCall(PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+1)); 3222 if (oldmat->colmap) { 3223 #if defined(PETSC_USE_CTABLE) 3224 PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap)); 3225 #else 3226 PetscCall(PetscMalloc1(a->Nbs,&a->colmap)); 3227 PetscCall(PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt))); 3228 PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs)); 3229 #endif 3230 } else a->colmap = NULL; 3231 3232 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3233 PetscCall(PetscMalloc1(len,&a->garray)); 3234 PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt))); 3235 PetscCall(PetscArraycpy(a->garray,oldmat->garray,len)); 3236 } else a->garray = NULL; 3237 3238 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash)); 3239 PetscCall(VecDuplicate(oldmat->lvec,&a->lvec)); 3240 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec)); 3241 PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx)); 3242 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx)); 3243 3244 PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A)); 3245 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 3246 PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B)); 3247 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B)); 3248 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist)); 3249 *newmat = mat; 3250 PetscFunctionReturn(0); 3251 } 3252 3253 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3254 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 3255 { 3256 PetscInt header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k; 3257 PetscInt *rowidxs,*colidxs,rs,cs,ce; 3258 PetscScalar *matvals; 3259 3260 PetscFunctionBegin; 3261 PetscCall(PetscViewerSetUp(viewer)); 3262 3263 /* read in matrix header */ 3264 PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT)); 3265 PetscCheck(header[0] == MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3266 M = header[1]; N = header[2]; nz = header[3]; 3267 PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M); 3268 PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N); 3269 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3270 3271 /* set block sizes from the viewer's .info file */ 3272 PetscCall(MatLoad_Binary_BlockSizes(mat,viewer)); 3273 /* set local sizes if not set already */ 3274 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3275 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3276 /* set global sizes if not set already */ 3277 if (mat->rmap->N < 0) mat->rmap->N = M; 3278 if (mat->cmap->N < 0) mat->cmap->N = N; 3279 PetscCall(PetscLayoutSetUp(mat->rmap)); 3280 PetscCall(PetscLayoutSetUp(mat->cmap)); 3281 3282 /* check if the matrix sizes are correct */ 3283 PetscCall(MatGetSize(mat,&rows,&cols)); 3284 PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols); 3285 PetscCall(MatGetBlockSize(mat,&bs)); 3286 PetscCall(MatGetLocalSize(mat,&m,&n)); 3287 PetscCall(PetscLayoutGetRange(mat->rmap,&rs,NULL)); 3288 PetscCall(PetscLayoutGetRange(mat->cmap,&cs,&ce)); 3289 mbs = m/bs; nbs = n/bs; 3290 3291 /* read in row lengths and build row indices */ 3292 PetscCall(PetscMalloc1(m+1,&rowidxs)); 3293 PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT)); 3294 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3295 PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer))); 3296 PetscCheck(sum == nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum); 3297 3298 /* read in column indices and matrix values */ 3299 PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals)); 3300 PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 3301 PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 3302 3303 { /* preallocate matrix storage */ 3304 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3305 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3306 PetscBool sbaij,done; 3307 PetscInt *d_nnz,*o_nnz; 3308 3309 PetscCall(PetscBTCreate(nbs,&bt)); 3310 PetscCall(PetscHSetICreate(&ht)); 3311 PetscCall(PetscCalloc2(mbs,&d_nnz,mbs,&o_nnz)); 3312 PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATMPISBAIJ,&sbaij)); 3313 for (i=0; i<mbs; i++) { 3314 PetscCall(PetscBTMemzero(nbs,bt)); 3315 PetscCall(PetscHSetIClear(ht)); 3316 for (k=0; k<bs; k++) { 3317 PetscInt row = bs*i + k; 3318 for (j=rowidxs[row]; j<rowidxs[row+1]; j++) { 3319 PetscInt col = colidxs[j]; 3320 if (!sbaij || col >= row) { 3321 if (col >= cs && col < ce) { 3322 if (!PetscBTLookupSet(bt,(col-cs)/bs)) d_nnz[i]++; 3323 } else { 3324 PetscCall(PetscHSetIQueryAdd(ht,col/bs,&done)); 3325 if (done) o_nnz[i]++; 3326 } 3327 } 3328 } 3329 } 3330 } 3331 PetscCall(PetscBTDestroy(&bt)); 3332 PetscCall(PetscHSetIDestroy(&ht)); 3333 PetscCall(MatMPIBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz)); 3334 PetscCall(MatMPISBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz)); 3335 PetscCall(PetscFree2(d_nnz,o_nnz)); 3336 } 3337 3338 /* store matrix values */ 3339 for (i=0; i<m; i++) { 3340 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i+1]; 3341 PetscCall((*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES)); 3342 } 3343 3344 PetscCall(PetscFree(rowidxs)); 3345 PetscCall(PetscFree2(colidxs,matvals)); 3346 PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); 3347 PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY)); 3348 PetscFunctionReturn(0); 3349 } 3350 3351 PetscErrorCode MatLoad_MPIBAIJ(Mat mat,PetscViewer viewer) 3352 { 3353 PetscBool isbinary; 3354 3355 PetscFunctionBegin; 3356 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 3357 PetscCheck(isbinary,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 3358 PetscCall(MatLoad_MPIBAIJ_Binary(mat,viewer)); 3359 PetscFunctionReturn(0); 3360 } 3361 3362 /*@ 3363 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3364 3365 Input Parameters: 3366 + mat - the matrix 3367 - fact - factor 3368 3369 Not Collective, each process can use a different factor 3370 3371 Level: advanced 3372 3373 Notes: 3374 This can also be set by the command line option: -mat_use_hash_table <fact> 3375 3376 .seealso: `MatSetOption()` 3377 @*/ 3378 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3379 { 3380 PetscFunctionBegin; 3381 PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact)); 3382 PetscFunctionReturn(0); 3383 } 3384 3385 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3386 { 3387 Mat_MPIBAIJ *baij; 3388 3389 PetscFunctionBegin; 3390 baij = (Mat_MPIBAIJ*)mat->data; 3391 baij->ht_fact = fact; 3392 PetscFunctionReturn(0); 3393 } 3394 3395 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3396 { 3397 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3398 PetscBool flg; 3399 3400 PetscFunctionBegin; 3401 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg)); 3402 PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input"); 3403 if (Ad) *Ad = a->A; 3404 if (Ao) *Ao = a->B; 3405 if (colmap) *colmap = a->garray; 3406 PetscFunctionReturn(0); 3407 } 3408 3409 /* 3410 Special version for direct calls from Fortran (to eliminate two function call overheads 3411 */ 3412 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3413 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3414 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3415 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3416 #endif 3417 3418 /*@C 3419 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3420 3421 Collective on Mat 3422 3423 Input Parameters: 3424 + mat - the matrix 3425 . min - number of input rows 3426 . im - input rows 3427 . nin - number of input columns 3428 . in - input columns 3429 . v - numerical values input 3430 - addvin - INSERT_VALUES or ADD_VALUES 3431 3432 Notes: 3433 This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3434 3435 Level: advanced 3436 3437 .seealso: `MatSetValuesBlocked()` 3438 @*/ 3439 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3440 { 3441 /* convert input arguments to C version */ 3442 Mat mat = *matin; 3443 PetscInt m = *min, n = *nin; 3444 InsertMode addv = *addvin; 3445 3446 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3447 const MatScalar *value; 3448 MatScalar *barray = baij->barray; 3449 PetscBool roworiented = baij->roworiented; 3450 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3451 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3452 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3453 3454 PetscFunctionBegin; 3455 /* tasks normally handled by MatSetValuesBlocked() */ 3456 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3457 else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3458 PetscCheck(!mat->factortype,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3459 if (mat->assembled) { 3460 mat->was_assembled = PETSC_TRUE; 3461 mat->assembled = PETSC_FALSE; 3462 } 3463 PetscCall(PetscLogEventBegin(MAT_SetValues,mat,0,0,0)); 3464 3465 if (!barray) { 3466 PetscCall(PetscMalloc1(bs2,&barray)); 3467 baij->barray = barray; 3468 } 3469 3470 if (roworiented) stepval = (n-1)*bs; 3471 else stepval = (m-1)*bs; 3472 3473 for (i=0; i<m; i++) { 3474 if (im[i] < 0) continue; 3475 PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1); 3476 if (im[i] >= rstart && im[i] < rend) { 3477 row = im[i] - rstart; 3478 for (j=0; j<n; j++) { 3479 /* If NumCol = 1 then a copy is not required */ 3480 if ((roworiented) && (n == 1)) { 3481 barray = (MatScalar*)v + i*bs2; 3482 } else if ((!roworiented) && (m == 1)) { 3483 barray = (MatScalar*)v + j*bs2; 3484 } else { /* Here a copy is required */ 3485 if (roworiented) { 3486 value = v + i*(stepval+bs)*bs + j*bs; 3487 } else { 3488 value = v + j*(stepval+bs)*bs + i*bs; 3489 } 3490 for (ii=0; ii<bs; ii++,value+=stepval) { 3491 for (jj=0; jj<bs; jj++) { 3492 *barray++ = *value++; 3493 } 3494 } 3495 barray -=bs2; 3496 } 3497 3498 if (in[j] >= cstart && in[j] < cend) { 3499 col = in[j] - cstart; 3500 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j])); 3501 } else if (in[j] < 0) { 3502 continue; 3503 } else { 3504 PetscCheck(in[j] < baij->Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],baij->Nbs-1); 3505 if (mat->was_assembled) { 3506 if (!baij->colmap) { 3507 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 3508 } 3509 3510 #if defined(PETSC_USE_DEBUG) 3511 #if defined(PETSC_USE_CTABLE) 3512 { PetscInt data; 3513 PetscCall(PetscTableFind(baij->colmap,in[j]+1,&data)); 3514 PetscCheck((data - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3515 } 3516 #else 3517 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3518 #endif 3519 #endif 3520 #if defined(PETSC_USE_CTABLE) 3521 PetscCall(PetscTableFind(baij->colmap,in[j]+1,&col)); 3522 col = (col - 1)/bs; 3523 #else 3524 col = (baij->colmap[in[j]] - 1)/bs; 3525 #endif 3526 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3527 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 3528 col = in[j]; 3529 } 3530 } else col = in[j]; 3531 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j])); 3532 } 3533 } 3534 } else { 3535 if (!baij->donotstash) { 3536 if (roworiented) { 3537 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 3538 } else { 3539 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 3540 } 3541 } 3542 } 3543 } 3544 3545 /* task normally handled by MatSetValuesBlocked() */ 3546 PetscCall(PetscLogEventEnd(MAT_SetValues,mat,0,0,0)); 3547 PetscFunctionReturn(0); 3548 } 3549 3550 /*@ 3551 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block 3552 CSR format the local rows. 3553 3554 Collective 3555 3556 Input Parameters: 3557 + comm - MPI communicator 3558 . bs - the block size, only a block size of 1 is supported 3559 . m - number of local rows (Cannot be PETSC_DECIDE) 3560 . n - This value should be the same as the local size used in creating the 3561 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3562 calculated if N is given) For square matrices n is almost always m. 3563 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3564 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3565 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3566 . j - column indices 3567 - a - matrix values 3568 3569 Output Parameter: 3570 . mat - the matrix 3571 3572 Level: intermediate 3573 3574 Notes: 3575 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3576 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3577 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3578 3579 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3580 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3581 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3582 with column-major ordering within blocks. 3583 3584 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3585 3586 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 3587 `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 3588 @*/ 3589 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3590 { 3591 PetscFunctionBegin; 3592 PetscCheck(!i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3593 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3594 PetscCall(MatCreate(comm,mat)); 3595 PetscCall(MatSetSizes(*mat,m,n,M,N)); 3596 PetscCall(MatSetType(*mat,MATMPIBAIJ)); 3597 PetscCall(MatSetBlockSize(*mat,bs)); 3598 PetscCall(MatSetUp(*mat)); 3599 PetscCall(MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE)); 3600 PetscCall(MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a)); 3601 PetscCall(MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE)); 3602 PetscFunctionReturn(0); 3603 } 3604 3605 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3606 { 3607 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3608 PetscInt *indx; 3609 PetscScalar *values; 3610 3611 PetscFunctionBegin; 3612 PetscCall(MatGetSize(inmat,&m,&N)); 3613 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3614 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inmat->data; 3615 PetscInt *dnz,*onz,mbs,Nbs,nbs; 3616 PetscInt *bindx,rmax=a->rmax,j; 3617 PetscMPIInt rank,size; 3618 3619 PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 3620 mbs = m/bs; Nbs = N/cbs; 3621 if (n == PETSC_DECIDE) { 3622 PetscCall(PetscSplitOwnershipBlock(comm,cbs,&n,&N)); 3623 } 3624 nbs = n/cbs; 3625 3626 PetscCall(PetscMalloc1(rmax,&bindx)); 3627 MatPreallocateBegin(comm,mbs,nbs,dnz,onz); /* inline function, output __end and __rstart are used below */ 3628 3629 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3630 PetscCallMPI(MPI_Comm_rank(comm,&size)); 3631 if (rank == size-1) { 3632 /* Check sum(nbs) = Nbs */ 3633 PetscCheck(__end == Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT,__end,Nbs); 3634 } 3635 3636 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 3637 for (i=0; i<mbs; i++) { 3638 PetscCall(MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL)); /* non-blocked nnz and indx */ 3639 nnz = nnz/bs; 3640 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3641 PetscCall(MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz)); 3642 PetscCall(MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL)); 3643 } 3644 PetscCall(PetscFree(bindx)); 3645 3646 PetscCall(MatCreate(comm,outmat)); 3647 PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 3648 PetscCall(MatSetBlockSizes(*outmat,bs,cbs)); 3649 PetscCall(MatSetType(*outmat,MATBAIJ)); 3650 PetscCall(MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz)); 3651 PetscCall(MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz)); 3652 MatPreallocateEnd(dnz,onz); 3653 PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 3654 } 3655 3656 /* numeric phase */ 3657 PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 3658 PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL)); 3659 3660 for (i=0; i<m; i++) { 3661 PetscCall(MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values)); 3662 Ii = i + rstart; 3663 PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES)); 3664 PetscCall(MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values)); 3665 } 3666 PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY)); 3667 PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY)); 3668 PetscFunctionReturn(0); 3669 } 3670