1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 #if defined(PETSC_HAVE_HYPRE) 8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 9 #endif 10 11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 12 { 13 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 14 PetscErrorCode ierr; 15 PetscInt i,*idxb = 0; 16 PetscScalar *va,*vb; 17 Vec vtmp; 18 19 PetscFunctionBegin; 20 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 21 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 22 if (idx) { 23 for (i=0; i<A->rmap->n; i++) { 24 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 25 } 26 } 27 28 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 29 if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);} 30 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 31 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 32 33 for (i=0; i<A->rmap->n; i++) { 34 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 35 va[i] = vb[i]; 36 if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs); 37 } 38 } 39 40 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 41 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 42 ierr = PetscFree(idxb);CHKERRQ(ierr); 43 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 44 PetscFunctionReturn(0); 45 } 46 47 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 48 { 49 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 54 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 55 PetscFunctionReturn(0); 56 } 57 58 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 59 { 60 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 61 PetscErrorCode ierr; 62 63 PetscFunctionBegin; 64 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 65 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 66 PetscFunctionReturn(0); 67 } 68 69 /* 70 Local utility routine that creates a mapping from the global column 71 number to the local number in the off-diagonal part of the local 72 storage of the matrix. This is done in a non scalable way since the 73 length of colmap equals the global matrix length. 74 */ 75 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 76 { 77 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 78 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 79 PetscErrorCode ierr; 80 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 81 82 PetscFunctionBegin; 83 #if defined(PETSC_USE_CTABLE) 84 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 85 for (i=0; i<nbs; i++) { 86 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 87 } 88 #else 89 ierr = PetscMalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 90 ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 92 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 93 #endif 94 PetscFunctionReturn(0); 95 } 96 97 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 98 { \ 99 \ 100 brow = row/bs; \ 101 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 102 rmax = aimax[brow]; nrow = ailen[brow]; \ 103 bcol = col/bs; \ 104 ridx = row % bs; cidx = col % bs; \ 105 low = 0; high = nrow; \ 106 while (high-low > 3) { \ 107 t = (low+high)/2; \ 108 if (rp[t] > bcol) high = t; \ 109 else low = t; \ 110 } \ 111 for (_i=low; _i<high; _i++) { \ 112 if (rp[_i] > bcol) break; \ 113 if (rp[_i] == bcol) { \ 114 bap = ap + bs2*_i + bs*cidx + ridx; \ 115 if (addv == ADD_VALUES) *bap += value; \ 116 else *bap = value; \ 117 goto a_noinsert; \ 118 } \ 119 } \ 120 if (a->nonew == 1) goto a_noinsert; \ 121 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 122 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 123 N = nrow++ - 1; \ 124 /* shift up all the later entries in this row */ \ 125 for (ii=N; ii>=_i; ii--) { \ 126 rp[ii+1] = rp[ii]; \ 127 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 128 } \ 129 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 130 rp[_i] = bcol; \ 131 ap[bs2*_i + bs*cidx + ridx] = value; \ 132 a_noinsert:; \ 133 ailen[brow] = nrow; \ 134 } 135 136 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 137 { \ 138 brow = row/bs; \ 139 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 140 rmax = bimax[brow]; nrow = bilen[brow]; \ 141 bcol = col/bs; \ 142 ridx = row % bs; cidx = col % bs; \ 143 low = 0; high = nrow; \ 144 while (high-low > 3) { \ 145 t = (low+high)/2; \ 146 if (rp[t] > bcol) high = t; \ 147 else low = t; \ 148 } \ 149 for (_i=low; _i<high; _i++) { \ 150 if (rp[_i] > bcol) break; \ 151 if (rp[_i] == bcol) { \ 152 bap = ap + bs2*_i + bs*cidx + ridx; \ 153 if (addv == ADD_VALUES) *bap += value; \ 154 else *bap = value; \ 155 goto b_noinsert; \ 156 } \ 157 } \ 158 if (b->nonew == 1) goto b_noinsert; \ 159 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 160 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 161 N = nrow++ - 1; \ 162 /* shift up all the later entries in this row */ \ 163 for (ii=N; ii>=_i; ii--) { \ 164 rp[ii+1] = rp[ii]; \ 165 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 166 } \ 167 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 168 rp[_i] = bcol; \ 169 ap[bs2*_i + bs*cidx + ridx] = value; \ 170 b_noinsert:; \ 171 bilen[brow] = nrow; \ 172 } 173 174 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 175 { 176 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 177 MatScalar value; 178 PetscBool roworiented = baij->roworiented; 179 PetscErrorCode ierr; 180 PetscInt i,j,row,col; 181 PetscInt rstart_orig=mat->rmap->rstart; 182 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 183 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 184 185 /* Some Variables required in the macro */ 186 Mat A = baij->A; 187 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 188 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 189 MatScalar *aa =a->a; 190 191 Mat B = baij->B; 192 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 193 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 194 MatScalar *ba =b->a; 195 196 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 197 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 198 MatScalar *ap,*bap; 199 200 PetscFunctionBegin; 201 for (i=0; i<m; i++) { 202 if (im[i] < 0) continue; 203 #if defined(PETSC_USE_DEBUG) 204 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 205 #endif 206 if (im[i] >= rstart_orig && im[i] < rend_orig) { 207 row = im[i] - rstart_orig; 208 for (j=0; j<n; j++) { 209 if (in[j] >= cstart_orig && in[j] < cend_orig) { 210 col = in[j] - cstart_orig; 211 if (roworiented) value = v[i*n+j]; 212 else value = v[i+j*m]; 213 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 214 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 215 } else if (in[j] < 0) continue; 216 #if defined(PETSC_USE_DEBUG) 217 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 218 #endif 219 else { 220 if (mat->was_assembled) { 221 if (!baij->colmap) { 222 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 223 } 224 #if defined(PETSC_USE_CTABLE) 225 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 226 col = col - 1; 227 #else 228 col = baij->colmap[in[j]/bs] - 1; 229 #endif 230 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 231 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 232 col = in[j]; 233 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 234 B = baij->B; 235 b = (Mat_SeqBAIJ*)(B)->data; 236 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 237 ba =b->a; 238 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 239 else col += in[j]%bs; 240 } else col = in[j]; 241 if (roworiented) value = v[i*n+j]; 242 else value = v[i+j*m]; 243 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 244 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 245 } 246 } 247 } else { 248 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 249 if (!baij->donotstash) { 250 mat->assembled = PETSC_FALSE; 251 if (roworiented) { 252 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 253 } else { 254 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 255 } 256 } 257 } 258 } 259 PetscFunctionReturn(0); 260 } 261 262 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 263 { 264 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 265 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 266 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 267 PetscErrorCode ierr; 268 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 269 PetscBool roworiented=a->roworiented; 270 const PetscScalar *value = v; 271 MatScalar *ap,*aa = a->a,*bap; 272 273 PetscFunctionBegin; 274 rp = aj + ai[row]; 275 ap = aa + bs2*ai[row]; 276 rmax = imax[row]; 277 nrow = ailen[row]; 278 value = v; 279 low = 0; 280 high = nrow; 281 while (high-low > 7) { 282 t = (low+high)/2; 283 if (rp[t] > col) high = t; 284 else low = t; 285 } 286 for (i=low; i<high; i++) { 287 if (rp[i] > col) break; 288 if (rp[i] == col) { 289 bap = ap + bs2*i; 290 if (roworiented) { 291 if (is == ADD_VALUES) { 292 for (ii=0; ii<bs; ii++) { 293 for (jj=ii; jj<bs2; jj+=bs) { 294 bap[jj] += *value++; 295 } 296 } 297 } else { 298 for (ii=0; ii<bs; ii++) { 299 for (jj=ii; jj<bs2; jj+=bs) { 300 bap[jj] = *value++; 301 } 302 } 303 } 304 } else { 305 if (is == ADD_VALUES) { 306 for (ii=0; ii<bs; ii++,value+=bs) { 307 for (jj=0; jj<bs; jj++) { 308 bap[jj] += value[jj]; 309 } 310 bap += bs; 311 } 312 } else { 313 for (ii=0; ii<bs; ii++,value+=bs) { 314 for (jj=0; jj<bs; jj++) { 315 bap[jj] = value[jj]; 316 } 317 bap += bs; 318 } 319 } 320 } 321 goto noinsert2; 322 } 323 } 324 if (nonew == 1) goto noinsert2; 325 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 326 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 327 N = nrow++ - 1; high++; 328 /* shift up all the later entries in this row */ 329 for (ii=N; ii>=i; ii--) { 330 rp[ii+1] = rp[ii]; 331 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 332 } 333 if (N >= i) { 334 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 335 } 336 rp[i] = col; 337 bap = ap + bs2*i; 338 if (roworiented) { 339 for (ii=0; ii<bs; ii++) { 340 for (jj=ii; jj<bs2; jj+=bs) { 341 bap[jj] = *value++; 342 } 343 } 344 } else { 345 for (ii=0; ii<bs; ii++) { 346 for (jj=0; jj<bs; jj++) { 347 *bap++ = *value++; 348 } 349 } 350 } 351 noinsert2:; 352 ailen[row] = nrow; 353 PetscFunctionReturn(0); 354 } 355 356 /* 357 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 358 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 359 */ 360 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 361 { 362 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 363 const PetscScalar *value; 364 MatScalar *barray = baij->barray; 365 PetscBool roworiented = baij->roworiented; 366 PetscErrorCode ierr; 367 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 368 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 369 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 370 371 PetscFunctionBegin; 372 if (!barray) { 373 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 374 baij->barray = barray; 375 } 376 377 if (roworiented) stepval = (n-1)*bs; 378 else stepval = (m-1)*bs; 379 380 for (i=0; i<m; i++) { 381 if (im[i] < 0) continue; 382 #if defined(PETSC_USE_DEBUG) 383 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 384 #endif 385 if (im[i] >= rstart && im[i] < rend) { 386 row = im[i] - rstart; 387 for (j=0; j<n; j++) { 388 /* If NumCol = 1 then a copy is not required */ 389 if ((roworiented) && (n == 1)) { 390 barray = (MatScalar*)v + i*bs2; 391 } else if ((!roworiented) && (m == 1)) { 392 barray = (MatScalar*)v + j*bs2; 393 } else { /* Here a copy is required */ 394 if (roworiented) { 395 value = v + (i*(stepval+bs) + j)*bs; 396 } else { 397 value = v + (j*(stepval+bs) + i)*bs; 398 } 399 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 400 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 401 barray += bs; 402 } 403 barray -= bs2; 404 } 405 406 if (in[j] >= cstart && in[j] < cend) { 407 col = in[j] - cstart; 408 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 409 } else if (in[j] < 0) continue; 410 #if defined(PETSC_USE_DEBUG) 411 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 412 #endif 413 else { 414 if (mat->was_assembled) { 415 if (!baij->colmap) { 416 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 417 } 418 419 #if defined(PETSC_USE_DEBUG) 420 #if defined(PETSC_USE_CTABLE) 421 { PetscInt data; 422 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 423 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 424 } 425 #else 426 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 427 #endif 428 #endif 429 #if defined(PETSC_USE_CTABLE) 430 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 431 col = (col - 1)/bs; 432 #else 433 col = (baij->colmap[in[j]] - 1)/bs; 434 #endif 435 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 436 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 437 col = in[j]; 438 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]); 439 } else col = in[j]; 440 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 441 } 442 } 443 } else { 444 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 445 if (!baij->donotstash) { 446 if (roworiented) { 447 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 448 } else { 449 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 450 } 451 } 452 } 453 } 454 PetscFunctionReturn(0); 455 } 456 457 #define HASH_KEY 0.6180339887 458 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 459 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 460 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 461 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 462 { 463 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 464 PetscBool roworiented = baij->roworiented; 465 PetscErrorCode ierr; 466 PetscInt i,j,row,col; 467 PetscInt rstart_orig=mat->rmap->rstart; 468 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 469 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 470 PetscReal tmp; 471 MatScalar **HD = baij->hd,value; 472 #if defined(PETSC_USE_DEBUG) 473 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 474 #endif 475 476 PetscFunctionBegin; 477 for (i=0; i<m; i++) { 478 #if defined(PETSC_USE_DEBUG) 479 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 480 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 481 #endif 482 row = im[i]; 483 if (row >= rstart_orig && row < rend_orig) { 484 for (j=0; j<n; j++) { 485 col = in[j]; 486 if (roworiented) value = v[i*n+j]; 487 else value = v[i+j*m]; 488 /* Look up PetscInto the Hash Table */ 489 key = (row/bs)*Nbs+(col/bs)+1; 490 h1 = HASH(size,key,tmp); 491 492 493 idx = h1; 494 #if defined(PETSC_USE_DEBUG) 495 insert_ct++; 496 total_ct++; 497 if (HT[idx] != key) { 498 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 499 if (idx == size) { 500 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 501 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 502 } 503 } 504 #else 505 if (HT[idx] != key) { 506 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 507 if (idx == size) { 508 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 509 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 510 } 511 } 512 #endif 513 /* A HASH table entry is found, so insert the values at the correct address */ 514 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 515 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 516 } 517 } else if (!baij->donotstash) { 518 if (roworiented) { 519 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 520 } else { 521 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 522 } 523 } 524 } 525 #if defined(PETSC_USE_DEBUG) 526 baij->ht_total_ct += total_ct; 527 baij->ht_insert_ct += insert_ct; 528 #endif 529 PetscFunctionReturn(0); 530 } 531 532 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 533 { 534 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 535 PetscBool roworiented = baij->roworiented; 536 PetscErrorCode ierr; 537 PetscInt i,j,ii,jj,row,col; 538 PetscInt rstart=baij->rstartbs; 539 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 540 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 541 PetscReal tmp; 542 MatScalar **HD = baij->hd,*baij_a; 543 const PetscScalar *v_t,*value; 544 #if defined(PETSC_USE_DEBUG) 545 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 546 #endif 547 548 PetscFunctionBegin; 549 if (roworiented) stepval = (n-1)*bs; 550 else stepval = (m-1)*bs; 551 552 for (i=0; i<m; i++) { 553 #if defined(PETSC_USE_DEBUG) 554 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 555 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 556 #endif 557 row = im[i]; 558 v_t = v + i*nbs2; 559 if (row >= rstart && row < rend) { 560 for (j=0; j<n; j++) { 561 col = in[j]; 562 563 /* Look up into the Hash Table */ 564 key = row*Nbs+col+1; 565 h1 = HASH(size,key,tmp); 566 567 idx = h1; 568 #if defined(PETSC_USE_DEBUG) 569 total_ct++; 570 insert_ct++; 571 if (HT[idx] != key) { 572 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 573 if (idx == size) { 574 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 575 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 576 } 577 } 578 #else 579 if (HT[idx] != key) { 580 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 581 if (idx == size) { 582 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 583 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 584 } 585 } 586 #endif 587 baij_a = HD[idx]; 588 if (roworiented) { 589 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 590 /* value = v + (i*(stepval+bs)+j)*bs; */ 591 value = v_t; 592 v_t += bs; 593 if (addv == ADD_VALUES) { 594 for (ii=0; ii<bs; ii++,value+=stepval) { 595 for (jj=ii; jj<bs2; jj+=bs) { 596 baij_a[jj] += *value++; 597 } 598 } 599 } else { 600 for (ii=0; ii<bs; ii++,value+=stepval) { 601 for (jj=ii; jj<bs2; jj+=bs) { 602 baij_a[jj] = *value++; 603 } 604 } 605 } 606 } else { 607 value = v + j*(stepval+bs)*bs + i*bs; 608 if (addv == ADD_VALUES) { 609 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 610 for (jj=0; jj<bs; jj++) { 611 baij_a[jj] += *value++; 612 } 613 } 614 } else { 615 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 616 for (jj=0; jj<bs; jj++) { 617 baij_a[jj] = *value++; 618 } 619 } 620 } 621 } 622 } 623 } else { 624 if (!baij->donotstash) { 625 if (roworiented) { 626 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 627 } else { 628 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 629 } 630 } 631 } 632 } 633 #if defined(PETSC_USE_DEBUG) 634 baij->ht_total_ct += total_ct; 635 baij->ht_insert_ct += insert_ct; 636 #endif 637 PetscFunctionReturn(0); 638 } 639 640 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 641 { 642 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 643 PetscErrorCode ierr; 644 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 645 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 646 647 PetscFunctionBegin; 648 for (i=0; i<m; i++) { 649 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 650 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 651 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 652 row = idxm[i] - bsrstart; 653 for (j=0; j<n; j++) { 654 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 655 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 656 if (idxn[j] >= bscstart && idxn[j] < bscend) { 657 col = idxn[j] - bscstart; 658 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 659 } else { 660 if (!baij->colmap) { 661 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 662 } 663 #if defined(PETSC_USE_CTABLE) 664 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 665 data--; 666 #else 667 data = baij->colmap[idxn[j]/bs]-1; 668 #endif 669 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 670 else { 671 col = data + idxn[j]%bs; 672 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 673 } 674 } 675 } 676 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 677 } 678 PetscFunctionReturn(0); 679 } 680 681 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 682 { 683 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 684 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 685 PetscErrorCode ierr; 686 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 687 PetscReal sum = 0.0; 688 MatScalar *v; 689 690 PetscFunctionBegin; 691 if (baij->size == 1) { 692 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 693 } else { 694 if (type == NORM_FROBENIUS) { 695 v = amat->a; 696 nz = amat->nz*bs2; 697 for (i=0; i<nz; i++) { 698 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 699 } 700 v = bmat->a; 701 nz = bmat->nz*bs2; 702 for (i=0; i<nz; i++) { 703 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 704 } 705 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 706 *nrm = PetscSqrtReal(*nrm); 707 } else if (type == NORM_1) { /* max column sum */ 708 PetscReal *tmp,*tmp2; 709 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 710 ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr); 711 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 712 v = amat->a; jj = amat->j; 713 for (i=0; i<amat->nz; i++) { 714 for (j=0; j<bs; j++) { 715 col = bs*(cstart + *jj) + j; /* column index */ 716 for (row=0; row<bs; row++) { 717 tmp[col] += PetscAbsScalar(*v); v++; 718 } 719 } 720 jj++; 721 } 722 v = bmat->a; jj = bmat->j; 723 for (i=0; i<bmat->nz; i++) { 724 for (j=0; j<bs; j++) { 725 col = bs*garray[*jj] + j; 726 for (row=0; row<bs; row++) { 727 tmp[col] += PetscAbsScalar(*v); v++; 728 } 729 } 730 jj++; 731 } 732 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 733 *nrm = 0.0; 734 for (j=0; j<mat->cmap->N; j++) { 735 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 736 } 737 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 738 } else if (type == NORM_INFINITY) { /* max row sum */ 739 PetscReal *sums; 740 ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr); 741 sum = 0.0; 742 for (j=0; j<amat->mbs; j++) { 743 for (row=0; row<bs; row++) sums[row] = 0.0; 744 v = amat->a + bs2*amat->i[j]; 745 nz = amat->i[j+1]-amat->i[j]; 746 for (i=0; i<nz; i++) { 747 for (col=0; col<bs; col++) { 748 for (row=0; row<bs; row++) { 749 sums[row] += PetscAbsScalar(*v); v++; 750 } 751 } 752 } 753 v = bmat->a + bs2*bmat->i[j]; 754 nz = bmat->i[j+1]-bmat->i[j]; 755 for (i=0; i<nz; i++) { 756 for (col=0; col<bs; col++) { 757 for (row=0; row<bs; row++) { 758 sums[row] += PetscAbsScalar(*v); v++; 759 } 760 } 761 } 762 for (row=0; row<bs; row++) { 763 if (sums[row] > sum) sum = sums[row]; 764 } 765 } 766 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 767 ierr = PetscFree(sums);CHKERRQ(ierr); 768 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 769 } 770 PetscFunctionReturn(0); 771 } 772 773 /* 774 Creates the hash table, and sets the table 775 This table is created only once. 776 If new entried need to be added to the matrix 777 then the hash table has to be destroyed and 778 recreated. 779 */ 780 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 781 { 782 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 783 Mat A = baij->A,B=baij->B; 784 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 785 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 786 PetscErrorCode ierr; 787 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 788 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 789 PetscInt *HT,key; 790 MatScalar **HD; 791 PetscReal tmp; 792 #if defined(PETSC_USE_INFO) 793 PetscInt ct=0,max=0; 794 #endif 795 796 PetscFunctionBegin; 797 if (baij->ht) PetscFunctionReturn(0); 798 799 baij->ht_size = (PetscInt)(factor*nz); 800 ht_size = baij->ht_size; 801 802 /* Allocate Memory for Hash Table */ 803 ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr); 804 HD = baij->hd; 805 HT = baij->ht; 806 807 /* Loop Over A */ 808 for (i=0; i<a->mbs; i++) { 809 for (j=ai[i]; j<ai[i+1]; j++) { 810 row = i+rstart; 811 col = aj[j]+cstart; 812 813 key = row*Nbs + col + 1; 814 h1 = HASH(ht_size,key,tmp); 815 for (k=0; k<ht_size; k++) { 816 if (!HT[(h1+k)%ht_size]) { 817 HT[(h1+k)%ht_size] = key; 818 HD[(h1+k)%ht_size] = a->a + j*bs2; 819 break; 820 #if defined(PETSC_USE_INFO) 821 } else { 822 ct++; 823 #endif 824 } 825 } 826 #if defined(PETSC_USE_INFO) 827 if (k> max) max = k; 828 #endif 829 } 830 } 831 /* Loop Over B */ 832 for (i=0; i<b->mbs; i++) { 833 for (j=bi[i]; j<bi[i+1]; j++) { 834 row = i+rstart; 835 col = garray[bj[j]]; 836 key = row*Nbs + col + 1; 837 h1 = HASH(ht_size,key,tmp); 838 for (k=0; k<ht_size; k++) { 839 if (!HT[(h1+k)%ht_size]) { 840 HT[(h1+k)%ht_size] = key; 841 HD[(h1+k)%ht_size] = b->a + j*bs2; 842 break; 843 #if defined(PETSC_USE_INFO) 844 } else { 845 ct++; 846 #endif 847 } 848 } 849 #if defined(PETSC_USE_INFO) 850 if (k> max) max = k; 851 #endif 852 } 853 } 854 855 /* Print Summary */ 856 #if defined(PETSC_USE_INFO) 857 for (i=0,j=0; i<ht_size; i++) { 858 if (HT[i]) j++; 859 } 860 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 861 #endif 862 PetscFunctionReturn(0); 863 } 864 865 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 866 { 867 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 868 PetscErrorCode ierr; 869 PetscInt nstash,reallocs; 870 871 PetscFunctionBegin; 872 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 873 874 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 875 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 876 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 877 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 878 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 879 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 880 PetscFunctionReturn(0); 881 } 882 883 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 884 { 885 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 886 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 887 PetscErrorCode ierr; 888 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 889 PetscInt *row,*col; 890 PetscBool r1,r2,r3,other_disassembled; 891 MatScalar *val; 892 PetscMPIInt n; 893 894 PetscFunctionBegin; 895 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 896 if (!baij->donotstash && !mat->nooffprocentries) { 897 while (1) { 898 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 899 if (!flg) break; 900 901 for (i=0; i<n;) { 902 /* Now identify the consecutive vals belonging to the same row */ 903 for (j=i,rstart=row[j]; j<n; j++) { 904 if (row[j] != rstart) break; 905 } 906 if (j < n) ncols = j-i; 907 else ncols = n-i; 908 /* Now assemble all these values with a single function call */ 909 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 910 i = j; 911 } 912 } 913 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 914 /* Now process the block-stash. Since the values are stashed column-oriented, 915 set the roworiented flag to column oriented, and after MatSetValues() 916 restore the original flags */ 917 r1 = baij->roworiented; 918 r2 = a->roworiented; 919 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 920 921 baij->roworiented = PETSC_FALSE; 922 a->roworiented = PETSC_FALSE; 923 924 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 925 while (1) { 926 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 927 if (!flg) break; 928 929 for (i=0; i<n;) { 930 /* Now identify the consecutive vals belonging to the same row */ 931 for (j=i,rstart=row[j]; j<n; j++) { 932 if (row[j] != rstart) break; 933 } 934 if (j < n) ncols = j-i; 935 else ncols = n-i; 936 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 937 i = j; 938 } 939 } 940 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 941 942 baij->roworiented = r1; 943 a->roworiented = r2; 944 945 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 946 } 947 948 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 949 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 950 951 /* determine if any processor has disassembled, if so we must 952 also disassemble ourselfs, in order that we may reassemble. */ 953 /* 954 if nonzero structure of submatrix B cannot change then we know that 955 no processor disassembled thus we can skip this stuff 956 */ 957 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 958 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 959 if (mat->was_assembled && !other_disassembled) { 960 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 961 } 962 } 963 964 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 965 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 966 } 967 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 968 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 969 970 #if defined(PETSC_USE_INFO) 971 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 972 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 973 974 baij->ht_total_ct = 0; 975 baij->ht_insert_ct = 0; 976 } 977 #endif 978 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 979 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 980 981 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 982 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 983 } 984 985 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 986 987 baij->rowvalues = 0; 988 989 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 990 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 991 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 992 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 993 } 994 PetscFunctionReturn(0); 995 } 996 997 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer); 998 #include <petscdraw.h> 999 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1000 { 1001 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1002 PetscErrorCode ierr; 1003 PetscMPIInt rank = baij->rank; 1004 PetscInt bs = mat->rmap->bs; 1005 PetscBool iascii,isdraw; 1006 PetscViewer sviewer; 1007 PetscViewerFormat format; 1008 1009 PetscFunctionBegin; 1010 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1011 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1012 if (iascii) { 1013 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1014 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1015 MatInfo info; 1016 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1017 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1018 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1019 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 1020 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 1021 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1022 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1023 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1024 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1025 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1026 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1027 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1028 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1029 PetscFunctionReturn(0); 1030 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1031 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1032 PetscFunctionReturn(0); 1033 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1034 PetscFunctionReturn(0); 1035 } 1036 } 1037 1038 if (isdraw) { 1039 PetscDraw draw; 1040 PetscBool isnull; 1041 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1042 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1043 if (isnull) PetscFunctionReturn(0); 1044 } 1045 1046 { 1047 /* assemble the entire matrix onto first processor. */ 1048 Mat A; 1049 Mat_SeqBAIJ *Aloc; 1050 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1051 MatScalar *a; 1052 const char *matname; 1053 1054 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1055 /* Perhaps this should be the type of mat? */ 1056 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1057 if (!rank) { 1058 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1059 } else { 1060 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1061 } 1062 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1063 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 1064 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1065 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1066 1067 /* copy over the A part */ 1068 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1069 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1070 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1071 1072 for (i=0; i<mbs; i++) { 1073 rvals[0] = bs*(baij->rstartbs + i); 1074 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1075 for (j=ai[i]; j<ai[i+1]; j++) { 1076 col = (baij->cstartbs+aj[j])*bs; 1077 for (k=0; k<bs; k++) { 1078 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1079 col++; a += bs; 1080 } 1081 } 1082 } 1083 /* copy over the B part */ 1084 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1085 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1086 for (i=0; i<mbs; i++) { 1087 rvals[0] = bs*(baij->rstartbs + i); 1088 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1089 for (j=ai[i]; j<ai[i+1]; j++) { 1090 col = baij->garray[aj[j]]*bs; 1091 for (k=0; k<bs; k++) { 1092 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1093 col++; a += bs; 1094 } 1095 } 1096 } 1097 ierr = PetscFree(rvals);CHKERRQ(ierr); 1098 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1099 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1100 /* 1101 Everyone has to call to draw the matrix since the graphics waits are 1102 synchronized across all processors that share the PetscDraw object 1103 */ 1104 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1105 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 1106 if (!rank) { 1107 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 1108 ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1109 } 1110 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1111 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1112 ierr = MatDestroy(&A);CHKERRQ(ierr); 1113 } 1114 PetscFunctionReturn(0); 1115 } 1116 1117 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1118 { 1119 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1120 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)a->A->data; 1121 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 1122 PetscErrorCode ierr; 1123 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1124 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1125 int fd; 1126 PetscScalar *column_values; 1127 FILE *file; 1128 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1129 PetscInt message_count,flowcontrolcount; 1130 1131 PetscFunctionBegin; 1132 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1133 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1134 nz = bs2*(A->nz + B->nz); 1135 rlen = mat->rmap->n; 1136 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1137 if (!rank) { 1138 header[0] = MAT_FILE_CLASSID; 1139 header[1] = mat->rmap->N; 1140 header[2] = mat->cmap->N; 1141 1142 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1143 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1144 /* get largest number of rows any processor has */ 1145 range = mat->rmap->range; 1146 for (i=1; i<size; i++) { 1147 rlen = PetscMax(rlen,range[i+1] - range[i]); 1148 } 1149 } else { 1150 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1151 } 1152 1153 ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr); 1154 /* compute lengths of each row */ 1155 for (i=0; i<a->mbs; i++) { 1156 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1157 } 1158 /* store the row lengths to the file */ 1159 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1160 if (!rank) { 1161 MPI_Status status; 1162 ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr); 1163 rlen = (range[1] - range[0])/bs; 1164 for (i=0; i<rlen; i++) { 1165 for (j=0; j<bs; j++) { 1166 row_lens[i*bs+j] = bs*crow_lens[i]; 1167 } 1168 } 1169 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1170 for (i=1; i<size; i++) { 1171 rlen = (range[i+1] - range[i])/bs; 1172 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1173 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1174 for (k=0; k<rlen; k++) { 1175 for (j=0; j<bs; j++) { 1176 row_lens[k*bs+j] = bs*crow_lens[k]; 1177 } 1178 } 1179 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1180 } 1181 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1182 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1183 } else { 1184 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1185 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1186 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1187 } 1188 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1189 1190 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1191 information needed to make it for each row from a block row. This does require more communication but still not more than 1192 the communication needed for the nonzero values */ 1193 nzmax = nz; /* space a largest processor needs */ 1194 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1195 ierr = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr); 1196 cnt = 0; 1197 for (i=0; i<a->mbs; i++) { 1198 pcnt = cnt; 1199 for (j=B->i[i]; j<B->i[i+1]; j++) { 1200 if ((col = garray[B->j[j]]) > cstart) break; 1201 for (l=0; l<bs; l++) { 1202 column_indices[cnt++] = bs*col+l; 1203 } 1204 } 1205 for (k=A->i[i]; k<A->i[i+1]; k++) { 1206 for (l=0; l<bs; l++) { 1207 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1208 } 1209 } 1210 for (; j<B->i[i+1]; j++) { 1211 for (l=0; l<bs; l++) { 1212 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1213 } 1214 } 1215 len = cnt - pcnt; 1216 for (k=1; k<bs; k++) { 1217 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1218 cnt += len; 1219 } 1220 } 1221 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1222 1223 /* store the columns to the file */ 1224 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1225 if (!rank) { 1226 MPI_Status status; 1227 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1228 for (i=1; i<size; i++) { 1229 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1230 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1231 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1232 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1233 } 1234 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1235 } else { 1236 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1237 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1238 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1239 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1240 } 1241 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1242 1243 /* load up the numerical values */ 1244 ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr); 1245 cnt = 0; 1246 for (i=0; i<a->mbs; i++) { 1247 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1248 for (j=B->i[i]; j<B->i[i+1]; j++) { 1249 if (garray[B->j[j]] > cstart) break; 1250 for (l=0; l<bs; l++) { 1251 for (ll=0; ll<bs; ll++) { 1252 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1253 } 1254 } 1255 cnt += bs; 1256 } 1257 for (k=A->i[i]; k<A->i[i+1]; k++) { 1258 for (l=0; l<bs; l++) { 1259 for (ll=0; ll<bs; ll++) { 1260 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1261 } 1262 } 1263 cnt += bs; 1264 } 1265 for (; j<B->i[i+1]; j++) { 1266 for (l=0; l<bs; l++) { 1267 for (ll=0; ll<bs; ll++) { 1268 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1269 } 1270 } 1271 cnt += bs; 1272 } 1273 cnt += (bs-1)*rlen; 1274 } 1275 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1276 1277 /* store the column values to the file */ 1278 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1279 if (!rank) { 1280 MPI_Status status; 1281 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1282 for (i=1; i<size; i++) { 1283 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1284 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1285 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1286 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1287 } 1288 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1289 } else { 1290 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1291 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1292 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1293 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1294 } 1295 ierr = PetscFree(column_values);CHKERRQ(ierr); 1296 1297 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1298 if (file) { 1299 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1300 } 1301 PetscFunctionReturn(0); 1302 } 1303 1304 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1305 { 1306 PetscErrorCode ierr; 1307 PetscBool iascii,isdraw,issocket,isbinary; 1308 1309 PetscFunctionBegin; 1310 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1311 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1312 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1313 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1314 if (iascii || isdraw || issocket) { 1315 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1316 } else if (isbinary) { 1317 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1318 } 1319 PetscFunctionReturn(0); 1320 } 1321 1322 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1323 { 1324 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1325 PetscErrorCode ierr; 1326 1327 PetscFunctionBegin; 1328 #if defined(PETSC_USE_LOG) 1329 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1330 #endif 1331 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1332 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1333 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1334 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1335 #if defined(PETSC_USE_CTABLE) 1336 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1337 #else 1338 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1339 #endif 1340 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1341 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1342 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1343 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1344 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1345 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1346 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1347 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1348 1349 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1350 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1351 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1352 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1353 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1354 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1355 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr); 1356 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1357 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr); 1358 #if defined(PETSC_HAVE_HYPRE) 1359 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL);CHKERRQ(ierr); 1360 #endif 1361 PetscFunctionReturn(0); 1362 } 1363 1364 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1365 { 1366 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1367 PetscErrorCode ierr; 1368 PetscInt nt; 1369 1370 PetscFunctionBegin; 1371 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1372 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1373 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1374 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1375 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1376 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1377 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1378 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1379 PetscFunctionReturn(0); 1380 } 1381 1382 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1383 { 1384 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1385 PetscErrorCode ierr; 1386 1387 PetscFunctionBegin; 1388 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1389 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1390 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1391 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1392 PetscFunctionReturn(0); 1393 } 1394 1395 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1396 { 1397 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1398 PetscErrorCode ierr; 1399 PetscBool merged; 1400 1401 PetscFunctionBegin; 1402 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1403 /* do nondiagonal part */ 1404 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1405 if (!merged) { 1406 /* send it on its way */ 1407 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1408 /* do local part */ 1409 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1410 /* receive remote parts: note this assumes the values are not actually */ 1411 /* inserted in yy until the next line */ 1412 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1413 } else { 1414 /* do local part */ 1415 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1416 /* send it on its way */ 1417 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1418 /* values actually were received in the Begin() but we need to call this nop */ 1419 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1420 } 1421 PetscFunctionReturn(0); 1422 } 1423 1424 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1425 { 1426 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1427 PetscErrorCode ierr; 1428 1429 PetscFunctionBegin; 1430 /* do nondiagonal part */ 1431 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1432 /* send it on its way */ 1433 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1434 /* do local part */ 1435 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1436 /* receive remote parts: note this assumes the values are not actually */ 1437 /* inserted in yy until the next line, which is true for my implementation*/ 1438 /* but is not perhaps always true. */ 1439 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1440 PetscFunctionReturn(0); 1441 } 1442 1443 /* 1444 This only works correctly for square matrices where the subblock A->A is the 1445 diagonal block 1446 */ 1447 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1448 { 1449 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1450 PetscErrorCode ierr; 1451 1452 PetscFunctionBegin; 1453 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1454 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1455 PetscFunctionReturn(0); 1456 } 1457 1458 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1459 { 1460 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1461 PetscErrorCode ierr; 1462 1463 PetscFunctionBegin; 1464 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1465 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1466 PetscFunctionReturn(0); 1467 } 1468 1469 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1470 { 1471 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1472 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1473 PetscErrorCode ierr; 1474 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1475 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1476 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1477 1478 PetscFunctionBegin; 1479 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1480 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1481 mat->getrowactive = PETSC_TRUE; 1482 1483 if (!mat->rowvalues && (idx || v)) { 1484 /* 1485 allocate enough space to hold information from the longest row. 1486 */ 1487 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1488 PetscInt max = 1,mbs = mat->mbs,tmp; 1489 for (i=0; i<mbs; i++) { 1490 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1491 if (max < tmp) max = tmp; 1492 } 1493 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1494 } 1495 lrow = row - brstart; 1496 1497 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1498 if (!v) {pvA = 0; pvB = 0;} 1499 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1500 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1501 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1502 nztot = nzA + nzB; 1503 1504 cmap = mat->garray; 1505 if (v || idx) { 1506 if (nztot) { 1507 /* Sort by increasing column numbers, assuming A and B already sorted */ 1508 PetscInt imark = -1; 1509 if (v) { 1510 *v = v_p = mat->rowvalues; 1511 for (i=0; i<nzB; i++) { 1512 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1513 else break; 1514 } 1515 imark = i; 1516 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1517 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1518 } 1519 if (idx) { 1520 *idx = idx_p = mat->rowindices; 1521 if (imark > -1) { 1522 for (i=0; i<imark; i++) { 1523 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1524 } 1525 } else { 1526 for (i=0; i<nzB; i++) { 1527 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1528 else break; 1529 } 1530 imark = i; 1531 } 1532 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1533 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1534 } 1535 } else { 1536 if (idx) *idx = 0; 1537 if (v) *v = 0; 1538 } 1539 } 1540 *nz = nztot; 1541 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1542 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1543 PetscFunctionReturn(0); 1544 } 1545 1546 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1547 { 1548 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1549 1550 PetscFunctionBegin; 1551 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1552 baij->getrowactive = PETSC_FALSE; 1553 PetscFunctionReturn(0); 1554 } 1555 1556 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1557 { 1558 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1559 PetscErrorCode ierr; 1560 1561 PetscFunctionBegin; 1562 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1563 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1564 PetscFunctionReturn(0); 1565 } 1566 1567 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1568 { 1569 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1570 Mat A = a->A,B = a->B; 1571 PetscErrorCode ierr; 1572 PetscReal isend[5],irecv[5]; 1573 1574 PetscFunctionBegin; 1575 info->block_size = (PetscReal)matin->rmap->bs; 1576 1577 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1578 1579 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1580 isend[3] = info->memory; isend[4] = info->mallocs; 1581 1582 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1583 1584 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1585 isend[3] += info->memory; isend[4] += info->mallocs; 1586 1587 if (flag == MAT_LOCAL) { 1588 info->nz_used = isend[0]; 1589 info->nz_allocated = isend[1]; 1590 info->nz_unneeded = isend[2]; 1591 info->memory = isend[3]; 1592 info->mallocs = isend[4]; 1593 } else if (flag == MAT_GLOBAL_MAX) { 1594 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1595 1596 info->nz_used = irecv[0]; 1597 info->nz_allocated = irecv[1]; 1598 info->nz_unneeded = irecv[2]; 1599 info->memory = irecv[3]; 1600 info->mallocs = irecv[4]; 1601 } else if (flag == MAT_GLOBAL_SUM) { 1602 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1603 1604 info->nz_used = irecv[0]; 1605 info->nz_allocated = irecv[1]; 1606 info->nz_unneeded = irecv[2]; 1607 info->memory = irecv[3]; 1608 info->mallocs = irecv[4]; 1609 } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1610 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1611 info->fill_ratio_needed = 0; 1612 info->factor_mallocs = 0; 1613 PetscFunctionReturn(0); 1614 } 1615 1616 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1617 { 1618 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1619 PetscErrorCode ierr; 1620 1621 PetscFunctionBegin; 1622 switch (op) { 1623 case MAT_NEW_NONZERO_LOCATIONS: 1624 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1625 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1626 case MAT_KEEP_NONZERO_PATTERN: 1627 case MAT_NEW_NONZERO_LOCATION_ERR: 1628 MatCheckPreallocated(A,1); 1629 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1630 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1631 break; 1632 case MAT_ROW_ORIENTED: 1633 MatCheckPreallocated(A,1); 1634 a->roworiented = flg; 1635 1636 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1637 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1638 break; 1639 case MAT_NEW_DIAGONALS: 1640 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1641 break; 1642 case MAT_IGNORE_OFF_PROC_ENTRIES: 1643 a->donotstash = flg; 1644 break; 1645 case MAT_USE_HASH_TABLE: 1646 a->ht_flag = flg; 1647 a->ht_fact = 1.39; 1648 break; 1649 case MAT_SYMMETRIC: 1650 case MAT_STRUCTURALLY_SYMMETRIC: 1651 case MAT_HERMITIAN: 1652 case MAT_SUBMAT_SINGLEIS: 1653 case MAT_SYMMETRY_ETERNAL: 1654 MatCheckPreallocated(A,1); 1655 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1656 break; 1657 default: 1658 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1659 } 1660 PetscFunctionReturn(0); 1661 } 1662 1663 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1664 { 1665 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1666 Mat_SeqBAIJ *Aloc; 1667 Mat B; 1668 PetscErrorCode ierr; 1669 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1670 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1671 MatScalar *a; 1672 1673 PetscFunctionBegin; 1674 if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1675 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1676 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1677 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1678 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1679 /* Do not know preallocation information, but must set block size */ 1680 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr); 1681 } else { 1682 B = *matout; 1683 } 1684 1685 /* copy over the A part */ 1686 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1687 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1688 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1689 1690 for (i=0; i<mbs; i++) { 1691 rvals[0] = bs*(baij->rstartbs + i); 1692 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1693 for (j=ai[i]; j<ai[i+1]; j++) { 1694 col = (baij->cstartbs+aj[j])*bs; 1695 for (k=0; k<bs; k++) { 1696 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1697 1698 col++; a += bs; 1699 } 1700 } 1701 } 1702 /* copy over the B part */ 1703 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1704 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1705 for (i=0; i<mbs; i++) { 1706 rvals[0] = bs*(baij->rstartbs + i); 1707 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1708 for (j=ai[i]; j<ai[i+1]; j++) { 1709 col = baij->garray[aj[j]]*bs; 1710 for (k=0; k<bs; k++) { 1711 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1712 col++; 1713 a += bs; 1714 } 1715 } 1716 } 1717 ierr = PetscFree(rvals);CHKERRQ(ierr); 1718 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1719 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1720 1721 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1722 else { 1723 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 1724 } 1725 PetscFunctionReturn(0); 1726 } 1727 1728 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1729 { 1730 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1731 Mat a = baij->A,b = baij->B; 1732 PetscErrorCode ierr; 1733 PetscInt s1,s2,s3; 1734 1735 PetscFunctionBegin; 1736 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1737 if (rr) { 1738 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1739 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1740 /* Overlap communication with computation. */ 1741 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1742 } 1743 if (ll) { 1744 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1745 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1746 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1747 } 1748 /* scale the diagonal block */ 1749 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1750 1751 if (rr) { 1752 /* Do a scatter end and then right scale the off-diagonal block */ 1753 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1754 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1755 } 1756 PetscFunctionReturn(0); 1757 } 1758 1759 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1760 { 1761 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1762 PetscInt *lrows; 1763 PetscInt r, len; 1764 PetscErrorCode ierr; 1765 1766 PetscFunctionBegin; 1767 /* get locally owned rows */ 1768 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 1769 /* fix right hand side if needed */ 1770 if (x && b) { 1771 const PetscScalar *xx; 1772 PetscScalar *bb; 1773 1774 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1775 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1776 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 1777 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1778 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1779 } 1780 1781 /* actually zap the local rows */ 1782 /* 1783 Zero the required rows. If the "diagonal block" of the matrix 1784 is square and the user wishes to set the diagonal we use separate 1785 code so that MatSetValues() is not called for each diagonal allocating 1786 new memory, thus calling lots of mallocs and slowing things down. 1787 1788 */ 1789 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1790 ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1791 if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */ 1792 PetscBool cong; 1793 ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr); 1794 if (cong) A->congruentlayouts = 1; 1795 else A->congruentlayouts = 0; 1796 } 1797 if ((diag != 0.0) && A->congruentlayouts) { 1798 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr); 1799 } else if (diag != 0.0) { 1800 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr); 1801 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1802 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1803 for (r = 0; r < len; ++r) { 1804 const PetscInt row = lrows[r] + A->rmap->rstart; 1805 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1806 } 1807 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1808 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1809 } else { 1810 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1811 } 1812 ierr = PetscFree(lrows);CHKERRQ(ierr); 1813 1814 /* only change matrix nonzero state if pattern was allowed to be changed */ 1815 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1816 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1817 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1818 } 1819 PetscFunctionReturn(0); 1820 } 1821 1822 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1823 { 1824 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1825 PetscErrorCode ierr; 1826 PetscMPIInt n = A->rmap->n; 1827 PetscInt i,j,k,r,p = 0,len = 0,row,col,count; 1828 PetscInt *lrows,*owners = A->rmap->range; 1829 PetscSFNode *rrows; 1830 PetscSF sf; 1831 const PetscScalar *xx; 1832 PetscScalar *bb,*mask; 1833 Vec xmask,lmask; 1834 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)l->B->data; 1835 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1836 PetscScalar *aa; 1837 1838 PetscFunctionBegin; 1839 /* Create SF where leaves are input rows and roots are owned rows */ 1840 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1841 for (r = 0; r < n; ++r) lrows[r] = -1; 1842 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1843 for (r = 0; r < N; ++r) { 1844 const PetscInt idx = rows[r]; 1845 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1846 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1847 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1848 } 1849 rrows[r].rank = p; 1850 rrows[r].index = rows[r] - owners[p]; 1851 } 1852 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1853 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1854 /* Collect flags for rows to be zeroed */ 1855 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1856 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1857 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1858 /* Compress and put in row numbers */ 1859 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1860 /* zero diagonal part of matrix */ 1861 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1862 /* handle off diagonal part of matrix */ 1863 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1864 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1865 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1866 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1867 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1868 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1869 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1870 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1871 if (x) { 1872 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1873 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1874 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1875 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1876 } 1877 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1878 /* remove zeroed rows of off diagonal matrix */ 1879 for (i = 0; i < len; ++i) { 1880 row = lrows[i]; 1881 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1882 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 1883 for (k = 0; k < count; ++k) { 1884 aa[0] = 0.0; 1885 aa += bs; 1886 } 1887 } 1888 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1889 for (i = 0; i < l->B->rmap->N; ++i) { 1890 row = i/bs; 1891 for (j = baij->i[row]; j < baij->i[row+1]; ++j) { 1892 for (k = 0; k < bs; ++k) { 1893 col = bs*baij->j[j] + k; 1894 if (PetscAbsScalar(mask[col])) { 1895 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 1896 if (x) bb[i] -= aa[0]*xx[col]; 1897 aa[0] = 0.0; 1898 } 1899 } 1900 } 1901 } 1902 if (x) { 1903 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1904 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1905 } 1906 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1907 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1908 ierr = PetscFree(lrows);CHKERRQ(ierr); 1909 1910 /* only change matrix nonzero state if pattern was allowed to be changed */ 1911 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1912 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1913 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1914 } 1915 PetscFunctionReturn(0); 1916 } 1917 1918 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1919 { 1920 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1921 PetscErrorCode ierr; 1922 1923 PetscFunctionBegin; 1924 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1925 PetscFunctionReturn(0); 1926 } 1927 1928 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 1929 1930 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1931 { 1932 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1933 Mat a,b,c,d; 1934 PetscBool flg; 1935 PetscErrorCode ierr; 1936 1937 PetscFunctionBegin; 1938 a = matA->A; b = matA->B; 1939 c = matB->A; d = matB->B; 1940 1941 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1942 if (flg) { 1943 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1944 } 1945 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1946 PetscFunctionReturn(0); 1947 } 1948 1949 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1950 { 1951 PetscErrorCode ierr; 1952 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1953 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 1954 1955 PetscFunctionBegin; 1956 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1957 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1958 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1959 } else { 1960 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1961 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1962 } 1963 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 1964 PetscFunctionReturn(0); 1965 } 1966 1967 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1968 { 1969 PetscErrorCode ierr; 1970 1971 PetscFunctionBegin; 1972 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1973 PetscFunctionReturn(0); 1974 } 1975 1976 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 1977 { 1978 PetscErrorCode ierr; 1979 PetscInt bs = Y->rmap->bs,m = Y->rmap->N/bs; 1980 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 1981 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 1982 1983 PetscFunctionBegin; 1984 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 1985 PetscFunctionReturn(0); 1986 } 1987 1988 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1989 { 1990 PetscErrorCode ierr; 1991 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 1992 PetscBLASInt bnz,one=1; 1993 Mat_SeqBAIJ *x,*y; 1994 PetscInt bs2 = Y->rmap->bs*Y->rmap->bs; 1995 1996 PetscFunctionBegin; 1997 if (str == SAME_NONZERO_PATTERN) { 1998 PetscScalar alpha = a; 1999 x = (Mat_SeqBAIJ*)xx->A->data; 2000 y = (Mat_SeqBAIJ*)yy->A->data; 2001 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2002 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2003 x = (Mat_SeqBAIJ*)xx->B->data; 2004 y = (Mat_SeqBAIJ*)yy->B->data; 2005 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2006 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2007 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2008 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2009 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2010 } else { 2011 Mat B; 2012 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 2013 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2014 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2015 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2016 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2017 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2018 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2019 ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr); 2020 ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2021 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2022 ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2023 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 2024 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2025 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2026 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2027 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2028 } 2029 PetscFunctionReturn(0); 2030 } 2031 2032 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 2033 { 2034 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2035 PetscErrorCode ierr; 2036 2037 PetscFunctionBegin; 2038 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2039 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2040 PetscFunctionReturn(0); 2041 } 2042 2043 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2044 { 2045 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2046 PetscErrorCode ierr; 2047 2048 PetscFunctionBegin; 2049 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2050 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2051 PetscFunctionReturn(0); 2052 } 2053 2054 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2055 { 2056 PetscErrorCode ierr; 2057 IS iscol_local; 2058 PetscInt csize; 2059 2060 PetscFunctionBegin; 2061 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2062 if (call == MAT_REUSE_MATRIX) { 2063 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2064 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2065 } else { 2066 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2067 } 2068 ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2069 if (call == MAT_INITIAL_MATRIX) { 2070 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2071 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2072 } 2073 PetscFunctionReturn(0); 2074 } 2075 2076 /* 2077 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2078 in local and then by concatenating the local matrices the end result. 2079 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 2080 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 2081 */ 2082 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2083 { 2084 PetscErrorCode ierr; 2085 PetscMPIInt rank,size; 2086 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2087 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2088 Mat M,Mreuse; 2089 MatScalar *vwork,*aa; 2090 MPI_Comm comm; 2091 IS isrow_new, iscol_new; 2092 Mat_SeqBAIJ *aij; 2093 2094 PetscFunctionBegin; 2095 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2096 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2097 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2098 /* The compression and expansion should be avoided. Doesn't point 2099 out errors, might change the indices, hence buggey */ 2100 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 2101 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 2102 2103 if (call == MAT_REUSE_MATRIX) { 2104 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 2105 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2106 ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse);CHKERRQ(ierr); 2107 } else { 2108 ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse);CHKERRQ(ierr); 2109 } 2110 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 2111 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 2112 /* 2113 m - number of local rows 2114 n - number of columns (same on all processors) 2115 rstart - first row in new global matrix generated 2116 */ 2117 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2118 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2119 m = m/bs; 2120 n = n/bs; 2121 2122 if (call == MAT_INITIAL_MATRIX) { 2123 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2124 ii = aij->i; 2125 jj = aij->j; 2126 2127 /* 2128 Determine the number of non-zeros in the diagonal and off-diagonal 2129 portions of the matrix in order to do correct preallocation 2130 */ 2131 2132 /* first get start and end of "diagonal" columns */ 2133 if (csize == PETSC_DECIDE) { 2134 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2135 if (mglobal == n*bs) { /* square matrix */ 2136 nlocal = m; 2137 } else { 2138 nlocal = n/size + ((n % size) > rank); 2139 } 2140 } else { 2141 nlocal = csize/bs; 2142 } 2143 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2144 rstart = rend - nlocal; 2145 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2146 2147 /* next, compute all the lengths */ 2148 ierr = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr); 2149 for (i=0; i<m; i++) { 2150 jend = ii[i+1] - ii[i]; 2151 olen = 0; 2152 dlen = 0; 2153 for (j=0; j<jend; j++) { 2154 if (*jj < rstart || *jj >= rend) olen++; 2155 else dlen++; 2156 jj++; 2157 } 2158 olens[i] = olen; 2159 dlens[i] = dlen; 2160 } 2161 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2162 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2163 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2164 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2165 ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2166 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 2167 } else { 2168 PetscInt ml,nl; 2169 2170 M = *newmat; 2171 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2172 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2173 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2174 /* 2175 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2176 rather than the slower MatSetValues(). 2177 */ 2178 M->was_assembled = PETSC_TRUE; 2179 M->assembled = PETSC_FALSE; 2180 } 2181 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2182 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2183 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2184 ii = aij->i; 2185 jj = aij->j; 2186 aa = aij->a; 2187 for (i=0; i<m; i++) { 2188 row = rstart/bs + i; 2189 nz = ii[i+1] - ii[i]; 2190 cwork = jj; jj += nz; 2191 vwork = aa; aa += nz*bs*bs; 2192 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2193 } 2194 2195 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2196 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2197 *newmat = M; 2198 2199 /* save submatrix used in processor for next request */ 2200 if (call == MAT_INITIAL_MATRIX) { 2201 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2202 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2203 } 2204 PetscFunctionReturn(0); 2205 } 2206 2207 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2208 { 2209 MPI_Comm comm,pcomm; 2210 PetscInt clocal_size,nrows; 2211 const PetscInt *rows; 2212 PetscMPIInt size; 2213 IS crowp,lcolp; 2214 PetscErrorCode ierr; 2215 2216 PetscFunctionBegin; 2217 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2218 /* make a collective version of 'rowp' */ 2219 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2220 if (pcomm==comm) { 2221 crowp = rowp; 2222 } else { 2223 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2224 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2225 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2226 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2227 } 2228 ierr = ISSetPermutation(crowp);CHKERRQ(ierr); 2229 /* make a local version of 'colp' */ 2230 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2231 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2232 if (size==1) { 2233 lcolp = colp; 2234 } else { 2235 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 2236 } 2237 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2238 /* now we just get the submatrix */ 2239 ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr); 2240 ierr = MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2241 /* clean up */ 2242 if (pcomm!=comm) { 2243 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2244 } 2245 if (size>1) { 2246 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2247 } 2248 PetscFunctionReturn(0); 2249 } 2250 2251 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2252 { 2253 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2254 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2255 2256 PetscFunctionBegin; 2257 if (nghosts) *nghosts = B->nbs; 2258 if (ghosts) *ghosts = baij->garray; 2259 PetscFunctionReturn(0); 2260 } 2261 2262 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2263 { 2264 Mat B; 2265 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2266 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2267 Mat_SeqAIJ *b; 2268 PetscErrorCode ierr; 2269 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2270 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2271 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2272 2273 PetscFunctionBegin; 2274 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2275 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2276 2277 /* ---------------------------------------------------------------- 2278 Tell every processor the number of nonzeros per row 2279 */ 2280 ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr); 2281 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2282 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2283 } 2284 ierr = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr); 2285 displs = recvcounts + size; 2286 for (i=0; i<size; i++) { 2287 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2288 displs[i] = A->rmap->range[i]/bs; 2289 } 2290 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2291 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2292 #else 2293 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2294 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2295 #endif 2296 /* --------------------------------------------------------------- 2297 Create the sequential matrix of the same type as the local block diagonal 2298 */ 2299 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2300 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2301 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2302 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2303 b = (Mat_SeqAIJ*)B->data; 2304 2305 /*-------------------------------------------------------------------- 2306 Copy my part of matrix column indices over 2307 */ 2308 sendcount = ad->nz + bd->nz; 2309 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2310 a_jsendbuf = ad->j; 2311 b_jsendbuf = bd->j; 2312 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2313 cnt = 0; 2314 for (i=0; i<n; i++) { 2315 2316 /* put in lower diagonal portion */ 2317 m = bd->i[i+1] - bd->i[i]; 2318 while (m > 0) { 2319 /* is it above diagonal (in bd (compressed) numbering) */ 2320 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2321 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2322 m--; 2323 } 2324 2325 /* put in diagonal portion */ 2326 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2327 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2328 } 2329 2330 /* put in upper diagonal portion */ 2331 while (m-- > 0) { 2332 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2333 } 2334 } 2335 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2336 2337 /*-------------------------------------------------------------------- 2338 Gather all column indices to all processors 2339 */ 2340 for (i=0; i<size; i++) { 2341 recvcounts[i] = 0; 2342 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2343 recvcounts[i] += lens[j]; 2344 } 2345 } 2346 displs[0] = 0; 2347 for (i=1; i<size; i++) { 2348 displs[i] = displs[i-1] + recvcounts[i-1]; 2349 } 2350 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2351 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2352 #else 2353 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2354 #endif 2355 /*-------------------------------------------------------------------- 2356 Assemble the matrix into useable form (note numerical values not yet set) 2357 */ 2358 /* set the b->ilen (length of each row) values */ 2359 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2360 /* set the b->i indices */ 2361 b->i[0] = 0; 2362 for (i=1; i<=A->rmap->N/bs; i++) { 2363 b->i[i] = b->i[i-1] + lens[i-1]; 2364 } 2365 ierr = PetscFree(lens);CHKERRQ(ierr); 2366 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2367 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2368 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2369 2370 if (A->symmetric) { 2371 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2372 } else if (A->hermitian) { 2373 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2374 } else if (A->structurally_symmetric) { 2375 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2376 } 2377 *newmat = B; 2378 PetscFunctionReturn(0); 2379 } 2380 2381 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2382 { 2383 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2384 PetscErrorCode ierr; 2385 Vec bb1 = 0; 2386 2387 PetscFunctionBegin; 2388 if (flag == SOR_APPLY_UPPER) { 2389 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2390 PetscFunctionReturn(0); 2391 } 2392 2393 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2394 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2395 } 2396 2397 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2398 if (flag & SOR_ZERO_INITIAL_GUESS) { 2399 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2400 its--; 2401 } 2402 2403 while (its--) { 2404 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2405 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2406 2407 /* update rhs: bb1 = bb - B*x */ 2408 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2409 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2410 2411 /* local sweep */ 2412 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2413 } 2414 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2415 if (flag & SOR_ZERO_INITIAL_GUESS) { 2416 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2417 its--; 2418 } 2419 while (its--) { 2420 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2421 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2422 2423 /* update rhs: bb1 = bb - B*x */ 2424 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2425 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2426 2427 /* local sweep */ 2428 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2429 } 2430 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2431 if (flag & SOR_ZERO_INITIAL_GUESS) { 2432 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2433 its--; 2434 } 2435 while (its--) { 2436 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2437 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2438 2439 /* update rhs: bb1 = bb - B*x */ 2440 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2441 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2442 2443 /* local sweep */ 2444 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2445 } 2446 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2447 2448 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2449 PetscFunctionReturn(0); 2450 } 2451 2452 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms) 2453 { 2454 PetscErrorCode ierr; 2455 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data; 2456 PetscInt N,i,*garray = aij->garray; 2457 PetscInt ib,jb,bs = A->rmap->bs; 2458 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data; 2459 MatScalar *a_val = a_aij->a; 2460 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data; 2461 MatScalar *b_val = b_aij->a; 2462 PetscReal *work; 2463 2464 PetscFunctionBegin; 2465 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 2466 ierr = PetscCalloc1(N,&work);CHKERRQ(ierr); 2467 if (type == NORM_2) { 2468 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2469 for (jb=0; jb<bs; jb++) { 2470 for (ib=0; ib<bs; ib++) { 2471 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2472 a_val++; 2473 } 2474 } 2475 } 2476 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2477 for (jb=0; jb<bs; jb++) { 2478 for (ib=0; ib<bs; ib++) { 2479 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2480 b_val++; 2481 } 2482 } 2483 } 2484 } else if (type == NORM_1) { 2485 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2486 for (jb=0; jb<bs; jb++) { 2487 for (ib=0; ib<bs; ib++) { 2488 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2489 a_val++; 2490 } 2491 } 2492 } 2493 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2494 for (jb=0; jb<bs; jb++) { 2495 for (ib=0; ib<bs; ib++) { 2496 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2497 b_val++; 2498 } 2499 } 2500 } 2501 } else if (type == NORM_INFINITY) { 2502 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2503 for (jb=0; jb<bs; jb++) { 2504 for (ib=0; ib<bs; ib++) { 2505 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2506 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2507 a_val++; 2508 } 2509 } 2510 } 2511 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2512 for (jb=0; jb<bs; jb++) { 2513 for (ib=0; ib<bs; ib++) { 2514 int col = garray[b_aij->j[i]] * bs + jb; 2515 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2516 b_val++; 2517 } 2518 } 2519 } 2520 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 2521 if (type == NORM_INFINITY) { 2522 ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2523 } else { 2524 ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2525 } 2526 ierr = PetscFree(work);CHKERRQ(ierr); 2527 if (type == NORM_2) { 2528 for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]); 2529 } 2530 PetscFunctionReturn(0); 2531 } 2532 2533 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2534 { 2535 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2536 PetscErrorCode ierr; 2537 2538 PetscFunctionBegin; 2539 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2540 A->factorerrortype = a->A->factorerrortype; 2541 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2542 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2543 PetscFunctionReturn(0); 2544 } 2545 2546 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a) 2547 { 2548 PetscErrorCode ierr; 2549 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ*)Y->data; 2550 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)maij->A->data; 2551 2552 PetscFunctionBegin; 2553 if (!Y->preallocated) { 2554 ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 2555 } else if (!aij->nz) { 2556 PetscInt nonew = aij->nonew; 2557 ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2558 aij->nonew = nonew; 2559 } 2560 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2561 PetscFunctionReturn(0); 2562 } 2563 2564 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool *missing,PetscInt *d) 2565 { 2566 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2567 PetscErrorCode ierr; 2568 2569 PetscFunctionBegin; 2570 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2571 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2572 if (d) { 2573 PetscInt rstart; 2574 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2575 *d += rstart/A->rmap->bs; 2576 2577 } 2578 PetscFunctionReturn(0); 2579 } 2580 2581 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2582 { 2583 PetscFunctionBegin; 2584 *a = ((Mat_MPIBAIJ*)A->data)->A; 2585 PetscFunctionReturn(0); 2586 } 2587 2588 /* -------------------------------------------------------------------*/ 2589 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2590 MatGetRow_MPIBAIJ, 2591 MatRestoreRow_MPIBAIJ, 2592 MatMult_MPIBAIJ, 2593 /* 4*/ MatMultAdd_MPIBAIJ, 2594 MatMultTranspose_MPIBAIJ, 2595 MatMultTransposeAdd_MPIBAIJ, 2596 0, 2597 0, 2598 0, 2599 /*10*/ 0, 2600 0, 2601 0, 2602 MatSOR_MPIBAIJ, 2603 MatTranspose_MPIBAIJ, 2604 /*15*/ MatGetInfo_MPIBAIJ, 2605 MatEqual_MPIBAIJ, 2606 MatGetDiagonal_MPIBAIJ, 2607 MatDiagonalScale_MPIBAIJ, 2608 MatNorm_MPIBAIJ, 2609 /*20*/ MatAssemblyBegin_MPIBAIJ, 2610 MatAssemblyEnd_MPIBAIJ, 2611 MatSetOption_MPIBAIJ, 2612 MatZeroEntries_MPIBAIJ, 2613 /*24*/ MatZeroRows_MPIBAIJ, 2614 0, 2615 0, 2616 0, 2617 0, 2618 /*29*/ MatSetUp_MPIBAIJ, 2619 0, 2620 0, 2621 MatGetDiagonalBlock_MPIBAIJ, 2622 0, 2623 /*34*/ MatDuplicate_MPIBAIJ, 2624 0, 2625 0, 2626 0, 2627 0, 2628 /*39*/ MatAXPY_MPIBAIJ, 2629 MatCreateSubMatrices_MPIBAIJ, 2630 MatIncreaseOverlap_MPIBAIJ, 2631 MatGetValues_MPIBAIJ, 2632 MatCopy_MPIBAIJ, 2633 /*44*/ 0, 2634 MatScale_MPIBAIJ, 2635 MatShift_MPIBAIJ, 2636 0, 2637 MatZeroRowsColumns_MPIBAIJ, 2638 /*49*/ 0, 2639 0, 2640 0, 2641 0, 2642 0, 2643 /*54*/ MatFDColoringCreate_MPIXAIJ, 2644 0, 2645 MatSetUnfactored_MPIBAIJ, 2646 MatPermute_MPIBAIJ, 2647 MatSetValuesBlocked_MPIBAIJ, 2648 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2649 MatDestroy_MPIBAIJ, 2650 MatView_MPIBAIJ, 2651 0, 2652 0, 2653 /*64*/ 0, 2654 0, 2655 0, 2656 0, 2657 0, 2658 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2659 0, 2660 0, 2661 0, 2662 0, 2663 /*74*/ 0, 2664 MatFDColoringApply_BAIJ, 2665 0, 2666 0, 2667 0, 2668 /*79*/ 0, 2669 0, 2670 0, 2671 0, 2672 MatLoad_MPIBAIJ, 2673 /*84*/ 0, 2674 0, 2675 0, 2676 0, 2677 0, 2678 /*89*/ 0, 2679 0, 2680 0, 2681 0, 2682 0, 2683 /*94*/ 0, 2684 0, 2685 0, 2686 0, 2687 0, 2688 /*99*/ 0, 2689 0, 2690 0, 2691 0, 2692 0, 2693 /*104*/0, 2694 MatRealPart_MPIBAIJ, 2695 MatImaginaryPart_MPIBAIJ, 2696 0, 2697 0, 2698 /*109*/0, 2699 0, 2700 0, 2701 0, 2702 MatMissingDiagonal_MPIBAIJ, 2703 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2704 0, 2705 MatGetGhosts_MPIBAIJ, 2706 0, 2707 0, 2708 /*119*/0, 2709 0, 2710 0, 2711 0, 2712 MatGetMultiProcBlock_MPIBAIJ, 2713 /*124*/0, 2714 MatGetColumnNorms_MPIBAIJ, 2715 MatInvertBlockDiagonal_MPIBAIJ, 2716 0, 2717 0, 2718 /*129*/ 0, 2719 0, 2720 0, 2721 0, 2722 0, 2723 /*134*/ 0, 2724 0, 2725 0, 2726 0, 2727 0, 2728 /*139*/ MatSetBlockSizes_Default, 2729 0, 2730 0, 2731 MatFDColoringSetUp_MPIXAIJ, 2732 0, 2733 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ 2734 }; 2735 2736 2737 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2738 2739 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2740 { 2741 PetscInt m,rstart,cstart,cend; 2742 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2743 const PetscInt *JJ =0; 2744 PetscScalar *values=0; 2745 PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented; 2746 PetscErrorCode ierr; 2747 2748 PetscFunctionBegin; 2749 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2750 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2751 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2752 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2753 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2754 m = B->rmap->n/bs; 2755 rstart = B->rmap->rstart/bs; 2756 cstart = B->cmap->rstart/bs; 2757 cend = B->cmap->rend/bs; 2758 2759 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2760 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2761 for (i=0; i<m; i++) { 2762 nz = ii[i+1] - ii[i]; 2763 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2764 nz_max = PetscMax(nz_max,nz); 2765 JJ = jj + ii[i]; 2766 for (j=0; j<nz; j++) { 2767 if (*JJ >= cstart) break; 2768 JJ++; 2769 } 2770 d = 0; 2771 for (; j<nz; j++) { 2772 if (*JJ++ >= cend) break; 2773 d++; 2774 } 2775 d_nnz[i] = d; 2776 o_nnz[i] = nz - d; 2777 } 2778 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2779 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2780 2781 values = (PetscScalar*)V; 2782 if (!values) { 2783 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2784 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2785 } 2786 for (i=0; i<m; i++) { 2787 PetscInt row = i + rstart; 2788 PetscInt ncols = ii[i+1] - ii[i]; 2789 const PetscInt *icols = jj + ii[i]; 2790 if (!roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2791 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2792 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2793 } else { /* block ordering does not match so we can only insert one block at a time. */ 2794 PetscInt j; 2795 for (j=0; j<ncols; j++) { 2796 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2797 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2798 } 2799 } 2800 } 2801 2802 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2803 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2804 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2805 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2806 PetscFunctionReturn(0); 2807 } 2808 2809 /*@C 2810 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2811 (the default parallel PETSc format). 2812 2813 Collective on MPI_Comm 2814 2815 Input Parameters: 2816 + B - the matrix 2817 . bs - the block size 2818 . i - the indices into j for the start of each local row (starts with zero) 2819 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2820 - v - optional values in the matrix 2821 2822 Level: developer 2823 2824 Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 2825 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 2826 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2827 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2828 block column and the second index is over columns within a block. 2829 2830 .keywords: matrix, aij, compressed row, sparse, parallel 2831 2832 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ 2833 @*/ 2834 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2835 { 2836 PetscErrorCode ierr; 2837 2838 PetscFunctionBegin; 2839 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2840 PetscValidType(B,1); 2841 PetscValidLogicalCollectiveInt(B,bs,2); 2842 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2843 PetscFunctionReturn(0); 2844 } 2845 2846 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2847 { 2848 Mat_MPIBAIJ *b; 2849 PetscErrorCode ierr; 2850 PetscInt i; 2851 2852 PetscFunctionBegin; 2853 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2854 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2855 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2856 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2857 2858 if (d_nnz) { 2859 for (i=0; i<B->rmap->n/bs; i++) { 2860 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2861 } 2862 } 2863 if (o_nnz) { 2864 for (i=0; i<B->rmap->n/bs; i++) { 2865 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2866 } 2867 } 2868 2869 b = (Mat_MPIBAIJ*)B->data; 2870 b->bs2 = bs*bs; 2871 b->mbs = B->rmap->n/bs; 2872 b->nbs = B->cmap->n/bs; 2873 b->Mbs = B->rmap->N/bs; 2874 b->Nbs = B->cmap->N/bs; 2875 2876 for (i=0; i<=b->size; i++) { 2877 b->rangebs[i] = B->rmap->range[i]/bs; 2878 } 2879 b->rstartbs = B->rmap->rstart/bs; 2880 b->rendbs = B->rmap->rend/bs; 2881 b->cstartbs = B->cmap->rstart/bs; 2882 b->cendbs = B->cmap->rend/bs; 2883 2884 #if defined(PETSC_USE_CTABLE) 2885 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2886 #else 2887 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2888 #endif 2889 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2890 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2891 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2892 2893 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2894 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2895 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2896 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2897 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2898 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2899 2900 if (!B->preallocated) { 2901 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2902 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2903 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2904 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2905 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2906 } 2907 2908 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2909 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2910 B->preallocated = PETSC_TRUE; 2911 B->was_assembled = PETSC_FALSE; 2912 B->assembled = PETSC_FALSE; 2913 PetscFunctionReturn(0); 2914 } 2915 2916 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2917 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2918 2919 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 2920 { 2921 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2922 PetscErrorCode ierr; 2923 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2924 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2925 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2926 2927 PetscFunctionBegin; 2928 ierr = PetscMalloc1(M+1,&ii);CHKERRQ(ierr); 2929 ii[0] = 0; 2930 for (i=0; i<M; i++) { 2931 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2932 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 2933 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2934 /* remove one from count of matrix has diagonal */ 2935 for (j=id[i]; j<id[i+1]; j++) { 2936 if (jd[j] == i) {ii[i+1]--;break;} 2937 } 2938 } 2939 ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr); 2940 cnt = 0; 2941 for (i=0; i<M; i++) { 2942 for (j=io[i]; j<io[i+1]; j++) { 2943 if (garray[jo[j]] > rstart) break; 2944 jj[cnt++] = garray[jo[j]]; 2945 } 2946 for (k=id[i]; k<id[i+1]; k++) { 2947 if (jd[k] != i) { 2948 jj[cnt++] = rstart + jd[k]; 2949 } 2950 } 2951 for (; j<io[i+1]; j++) { 2952 jj[cnt++] = garray[jo[j]]; 2953 } 2954 } 2955 ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr); 2956 PetscFunctionReturn(0); 2957 } 2958 2959 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2960 2961 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 2962 2963 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 2964 { 2965 PetscErrorCode ierr; 2966 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2967 Mat B; 2968 Mat_MPIAIJ *b; 2969 2970 PetscFunctionBegin; 2971 if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 2972 2973 if (reuse == MAT_REUSE_MATRIX) { 2974 B = *newmat; 2975 } else { 2976 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2977 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2978 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2979 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2980 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 2981 ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 2982 } 2983 b = (Mat_MPIAIJ*) B->data; 2984 2985 if (reuse == MAT_REUSE_MATRIX) { 2986 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A);CHKERRQ(ierr); 2987 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B);CHKERRQ(ierr); 2988 } else { 2989 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 2990 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2991 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 2992 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 2993 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 2994 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2995 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2996 } 2997 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2998 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2999 3000 if (reuse == MAT_INPLACE_MATRIX) { 3001 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 3002 } else { 3003 *newmat = B; 3004 } 3005 PetscFunctionReturn(0); 3006 } 3007 3008 /*MC 3009 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 3010 3011 Options Database Keys: 3012 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 3013 . -mat_block_size <bs> - set the blocksize used to store the matrix 3014 - -mat_use_hash_table <fact> 3015 3016 Level: beginner 3017 3018 .seealso: MatCreateMPIBAIJ 3019 M*/ 3020 3021 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 3022 3023 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3024 { 3025 Mat_MPIBAIJ *b; 3026 PetscErrorCode ierr; 3027 PetscBool flg = PETSC_FALSE; 3028 3029 PetscFunctionBegin; 3030 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3031 B->data = (void*)b; 3032 3033 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3034 B->assembled = PETSC_FALSE; 3035 3036 B->insertmode = NOT_SET_VALUES; 3037 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 3038 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 3039 3040 /* build local table of row and column ownerships */ 3041 ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr); 3042 3043 /* build cache for off array entries formed */ 3044 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 3045 3046 b->donotstash = PETSC_FALSE; 3047 b->colmap = NULL; 3048 b->garray = NULL; 3049 b->roworiented = PETSC_TRUE; 3050 3051 /* stuff used in block assembly */ 3052 b->barray = 0; 3053 3054 /* stuff used for matrix vector multiply */ 3055 b->lvec = 0; 3056 b->Mvctx = 0; 3057 3058 /* stuff for MatGetRow() */ 3059 b->rowindices = 0; 3060 b->rowvalues = 0; 3061 b->getrowactive = PETSC_FALSE; 3062 3063 /* hash table stuff */ 3064 b->ht = 0; 3065 b->hd = 0; 3066 b->ht_size = 0; 3067 b->ht_flag = PETSC_FALSE; 3068 b->ht_fact = 0; 3069 b->ht_total_ct = 0; 3070 b->ht_insert_ct = 0; 3071 3072 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 3073 b->ijonly = PETSC_FALSE; 3074 3075 3076 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3077 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3078 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3079 #if defined(PETSC_HAVE_HYPRE) 3080 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 3081 #endif 3082 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3083 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3084 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3085 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3086 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3087 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3088 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3089 3090 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3091 ierr = PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg);CHKERRQ(ierr); 3092 if (flg) { 3093 PetscReal fact = 1.39; 3094 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3095 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 3096 if (fact <= 1.0) fact = 1.39; 3097 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3098 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3099 } 3100 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3101 PetscFunctionReturn(0); 3102 } 3103 3104 /*MC 3105 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3106 3107 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3108 and MATMPIBAIJ otherwise. 3109 3110 Options Database Keys: 3111 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3112 3113 Level: beginner 3114 3115 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3116 M*/ 3117 3118 /*@C 3119 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3120 (block compressed row). For good matrix assembly performance 3121 the user should preallocate the matrix storage by setting the parameters 3122 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3123 performance can be increased by more than a factor of 50. 3124 3125 Collective on Mat 3126 3127 Input Parameters: 3128 + B - the matrix 3129 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3130 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3131 . d_nz - number of block nonzeros per block row in diagonal portion of local 3132 submatrix (same for all local rows) 3133 . d_nnz - array containing the number of block nonzeros in the various block rows 3134 of the in diagonal portion of the local (possibly different for each block 3135 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 3136 set it even if it is zero. 3137 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3138 submatrix (same for all local rows). 3139 - o_nnz - array containing the number of nonzeros in the various block rows of the 3140 off-diagonal portion of the local submatrix (possibly different for 3141 each block row) or NULL. 3142 3143 If the *_nnz parameter is given then the *_nz parameter is ignored 3144 3145 Options Database Keys: 3146 + -mat_block_size - size of the blocks to use 3147 - -mat_use_hash_table <fact> 3148 3149 Notes: 3150 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3151 than it must be used on all processors that share the object for that argument. 3152 3153 Storage Information: 3154 For a square global matrix we define each processor's diagonal portion 3155 to be its local rows and the corresponding columns (a square submatrix); 3156 each processor's off-diagonal portion encompasses the remainder of the 3157 local matrix (a rectangular submatrix). 3158 3159 The user can specify preallocated storage for the diagonal part of 3160 the local submatrix with either d_nz or d_nnz (not both). Set 3161 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3162 memory allocation. Likewise, specify preallocated storage for the 3163 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3164 3165 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3166 the figure below we depict these three local rows and all columns (0-11). 3167 3168 .vb 3169 0 1 2 3 4 5 6 7 8 9 10 11 3170 -------------------------- 3171 row 3 |o o o d d d o o o o o o 3172 row 4 |o o o d d d o o o o o o 3173 row 5 |o o o d d d o o o o o o 3174 -------------------------- 3175 .ve 3176 3177 Thus, any entries in the d locations are stored in the d (diagonal) 3178 submatrix, and any entries in the o locations are stored in the 3179 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3180 stored simply in the MATSEQBAIJ format for compressed row storage. 3181 3182 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3183 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3184 In general, for PDE problems in which most nonzeros are near the diagonal, 3185 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3186 or you will get TERRIBLE performance; see the users' manual chapter on 3187 matrices. 3188 3189 You can call MatGetInfo() to get information on how effective the preallocation was; 3190 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3191 You can also run with the option -info and look for messages with the string 3192 malloc in them to see if additional memory allocation was needed. 3193 3194 Level: intermediate 3195 3196 .keywords: matrix, block, aij, compressed row, sparse, parallel 3197 3198 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3199 @*/ 3200 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3201 { 3202 PetscErrorCode ierr; 3203 3204 PetscFunctionBegin; 3205 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3206 PetscValidType(B,1); 3207 PetscValidLogicalCollectiveInt(B,bs,2); 3208 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3209 PetscFunctionReturn(0); 3210 } 3211 3212 /*@C 3213 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3214 (block compressed row). For good matrix assembly performance 3215 the user should preallocate the matrix storage by setting the parameters 3216 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3217 performance can be increased by more than a factor of 50. 3218 3219 Collective on MPI_Comm 3220 3221 Input Parameters: 3222 + comm - MPI communicator 3223 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3224 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3225 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3226 This value should be the same as the local size used in creating the 3227 y vector for the matrix-vector product y = Ax. 3228 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3229 This value should be the same as the local size used in creating the 3230 x vector for the matrix-vector product y = Ax. 3231 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3232 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3233 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3234 submatrix (same for all local rows) 3235 . d_nnz - array containing the number of nonzero blocks in the various block rows 3236 of the in diagonal portion of the local (possibly different for each block 3237 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3238 and set it even if it is zero. 3239 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3240 submatrix (same for all local rows). 3241 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3242 off-diagonal portion of the local submatrix (possibly different for 3243 each block row) or NULL. 3244 3245 Output Parameter: 3246 . A - the matrix 3247 3248 Options Database Keys: 3249 + -mat_block_size - size of the blocks to use 3250 - -mat_use_hash_table <fact> 3251 3252 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3253 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3254 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3255 3256 Notes: 3257 If the *_nnz parameter is given then the *_nz parameter is ignored 3258 3259 A nonzero block is any block that as 1 or more nonzeros in it 3260 3261 The user MUST specify either the local or global matrix dimensions 3262 (possibly both). 3263 3264 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3265 than it must be used on all processors that share the object for that argument. 3266 3267 Storage Information: 3268 For a square global matrix we define each processor's diagonal portion 3269 to be its local rows and the corresponding columns (a square submatrix); 3270 each processor's off-diagonal portion encompasses the remainder of the 3271 local matrix (a rectangular submatrix). 3272 3273 The user can specify preallocated storage for the diagonal part of 3274 the local submatrix with either d_nz or d_nnz (not both). Set 3275 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3276 memory allocation. Likewise, specify preallocated storage for the 3277 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3278 3279 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3280 the figure below we depict these three local rows and all columns (0-11). 3281 3282 .vb 3283 0 1 2 3 4 5 6 7 8 9 10 11 3284 -------------------------- 3285 row 3 |o o o d d d o o o o o o 3286 row 4 |o o o d d d o o o o o o 3287 row 5 |o o o d d d o o o o o o 3288 -------------------------- 3289 .ve 3290 3291 Thus, any entries in the d locations are stored in the d (diagonal) 3292 submatrix, and any entries in the o locations are stored in the 3293 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3294 stored simply in the MATSEQBAIJ format for compressed row storage. 3295 3296 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3297 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3298 In general, for PDE problems in which most nonzeros are near the diagonal, 3299 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3300 or you will get TERRIBLE performance; see the users' manual chapter on 3301 matrices. 3302 3303 Level: intermediate 3304 3305 .keywords: matrix, block, aij, compressed row, sparse, parallel 3306 3307 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3308 @*/ 3309 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3310 { 3311 PetscErrorCode ierr; 3312 PetscMPIInt size; 3313 3314 PetscFunctionBegin; 3315 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3316 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3317 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3318 if (size > 1) { 3319 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3320 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3321 } else { 3322 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3323 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3324 } 3325 PetscFunctionReturn(0); 3326 } 3327 3328 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3329 { 3330 Mat mat; 3331 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3332 PetscErrorCode ierr; 3333 PetscInt len=0; 3334 3335 PetscFunctionBegin; 3336 *newmat = 0; 3337 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3338 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3339 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3340 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3341 3342 mat->factortype = matin->factortype; 3343 mat->preallocated = PETSC_TRUE; 3344 mat->assembled = PETSC_TRUE; 3345 mat->insertmode = NOT_SET_VALUES; 3346 3347 a = (Mat_MPIBAIJ*)mat->data; 3348 mat->rmap->bs = matin->rmap->bs; 3349 a->bs2 = oldmat->bs2; 3350 a->mbs = oldmat->mbs; 3351 a->nbs = oldmat->nbs; 3352 a->Mbs = oldmat->Mbs; 3353 a->Nbs = oldmat->Nbs; 3354 3355 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3356 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3357 3358 a->size = oldmat->size; 3359 a->rank = oldmat->rank; 3360 a->donotstash = oldmat->donotstash; 3361 a->roworiented = oldmat->roworiented; 3362 a->rowindices = 0; 3363 a->rowvalues = 0; 3364 a->getrowactive = PETSC_FALSE; 3365 a->barray = 0; 3366 a->rstartbs = oldmat->rstartbs; 3367 a->rendbs = oldmat->rendbs; 3368 a->cstartbs = oldmat->cstartbs; 3369 a->cendbs = oldmat->cendbs; 3370 3371 /* hash table stuff */ 3372 a->ht = 0; 3373 a->hd = 0; 3374 a->ht_size = 0; 3375 a->ht_flag = oldmat->ht_flag; 3376 a->ht_fact = oldmat->ht_fact; 3377 a->ht_total_ct = 0; 3378 a->ht_insert_ct = 0; 3379 3380 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3381 if (oldmat->colmap) { 3382 #if defined(PETSC_USE_CTABLE) 3383 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3384 #else 3385 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 3386 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3387 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3388 #endif 3389 } else a->colmap = 0; 3390 3391 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3392 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 3393 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3394 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3395 } else a->garray = 0; 3396 3397 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3398 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3399 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3400 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3401 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3402 3403 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3404 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3405 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3406 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3407 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3408 *newmat = mat; 3409 PetscFunctionReturn(0); 3410 } 3411 3412 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3413 { 3414 PetscErrorCode ierr; 3415 int fd; 3416 PetscInt i,nz,j,rstart,rend; 3417 PetscScalar *vals,*buf; 3418 MPI_Comm comm; 3419 MPI_Status status; 3420 PetscMPIInt rank,size,maxnz; 3421 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3422 PetscInt *locrowlens = NULL,*procsnz = NULL,*browners = NULL; 3423 PetscInt jj,*mycols,*ibuf,bs = newmat->rmap->bs,Mbs,mbs,extra_rows,mmax; 3424 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3425 PetscInt *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount; 3426 PetscInt dcount,kmax,k,nzcount,tmp,mend; 3427 3428 PetscFunctionBegin; 3429 /* force binary viewer to load .info file if it has not yet done so */ 3430 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3431 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3432 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3433 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3434 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3435 if (bs < 0) bs = 1; 3436 3437 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3438 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3439 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3440 if (!rank) { 3441 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3442 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3443 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3444 } 3445 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3446 M = header[1]; N = header[2]; 3447 3448 /* If global sizes are set, check if they are consistent with that given in the file */ 3449 if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M); 3450 if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N); 3451 3452 if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices"); 3453 3454 /* 3455 This code adds extra rows to make sure the number of rows is 3456 divisible by the blocksize 3457 */ 3458 Mbs = M/bs; 3459 extra_rows = bs - M + bs*Mbs; 3460 if (extra_rows == bs) extra_rows = 0; 3461 else Mbs++; 3462 if (extra_rows && !rank) { 3463 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3464 } 3465 3466 /* determine ownership of all rows */ 3467 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3468 mbs = Mbs/size + ((Mbs % size) > rank); 3469 m = mbs*bs; 3470 } else { /* User set */ 3471 m = newmat->rmap->n; 3472 mbs = m/bs; 3473 } 3474 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 3475 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3476 3477 /* process 0 needs enough room for process with most rows */ 3478 if (!rank) { 3479 mmax = rowners[1]; 3480 for (i=2; i<=size; i++) { 3481 mmax = PetscMax(mmax,rowners[i]); 3482 } 3483 mmax*=bs; 3484 } else mmax = -1; /* unused, but compiler warns anyway */ 3485 3486 rowners[0] = 0; 3487 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3488 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3489 rstart = rowners[rank]; 3490 rend = rowners[rank+1]; 3491 3492 /* distribute row lengths to all processors */ 3493 ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr); 3494 if (!rank) { 3495 mend = m; 3496 if (size == 1) mend = mend - extra_rows; 3497 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3498 for (j=mend; j<m; j++) locrowlens[j] = 1; 3499 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3500 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3501 for (j=0; j<m; j++) { 3502 procsnz[0] += locrowlens[j]; 3503 } 3504 for (i=1; i<size; i++) { 3505 mend = browners[i+1] - browners[i]; 3506 if (i == size-1) mend = mend - extra_rows; 3507 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3508 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3509 /* calculate the number of nonzeros on each processor */ 3510 for (j=0; j<browners[i+1]-browners[i]; j++) { 3511 procsnz[i] += rowlengths[j]; 3512 } 3513 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3514 } 3515 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3516 } else { 3517 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3518 } 3519 3520 if (!rank) { 3521 /* determine max buffer needed and allocate it */ 3522 maxnz = procsnz[0]; 3523 for (i=1; i<size; i++) { 3524 maxnz = PetscMax(maxnz,procsnz[i]); 3525 } 3526 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3527 3528 /* read in my part of the matrix column indices */ 3529 nz = procsnz[0]; 3530 ierr = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr); 3531 mycols = ibuf; 3532 if (size == 1) nz -= extra_rows; 3533 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3534 if (size == 1) { 3535 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 3536 } 3537 3538 /* read in every ones (except the last) and ship off */ 3539 for (i=1; i<size-1; i++) { 3540 nz = procsnz[i]; 3541 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3542 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3543 } 3544 /* read in the stuff for the last proc */ 3545 if (size != 1) { 3546 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3547 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3548 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3549 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3550 } 3551 ierr = PetscFree(cols);CHKERRQ(ierr); 3552 } else { 3553 /* determine buffer space needed for message */ 3554 nz = 0; 3555 for (i=0; i<m; i++) { 3556 nz += locrowlens[i]; 3557 } 3558 ierr = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr); 3559 mycols = ibuf; 3560 /* receive message of column indices*/ 3561 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3562 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3563 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3564 } 3565 3566 /* loop over local rows, determining number of off diagonal entries */ 3567 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 3568 ierr = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 3569 rowcount = 0; nzcount = 0; 3570 for (i=0; i<mbs; i++) { 3571 dcount = 0; 3572 odcount = 0; 3573 for (j=0; j<bs; j++) { 3574 kmax = locrowlens[rowcount]; 3575 for (k=0; k<kmax; k++) { 3576 tmp = mycols[nzcount++]/bs; 3577 if (!mask[tmp]) { 3578 mask[tmp] = 1; 3579 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3580 else masked1[dcount++] = tmp; 3581 } 3582 } 3583 rowcount++; 3584 } 3585 3586 dlens[i] = dcount; 3587 odlens[i] = odcount; 3588 3589 /* zero out the mask elements we set */ 3590 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3591 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3592 } 3593 3594 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3595 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3596 3597 if (!rank) { 3598 ierr = PetscMalloc1(maxnz+1,&buf);CHKERRQ(ierr); 3599 /* read in my part of the matrix numerical values */ 3600 nz = procsnz[0]; 3601 vals = buf; 3602 mycols = ibuf; 3603 if (size == 1) nz -= extra_rows; 3604 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3605 if (size == 1) { 3606 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 3607 } 3608 3609 /* insert into matrix */ 3610 jj = rstart*bs; 3611 for (i=0; i<m; i++) { 3612 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3613 mycols += locrowlens[i]; 3614 vals += locrowlens[i]; 3615 jj++; 3616 } 3617 /* read in other processors (except the last one) and ship out */ 3618 for (i=1; i<size-1; i++) { 3619 nz = procsnz[i]; 3620 vals = buf; 3621 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3622 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3623 } 3624 /* the last proc */ 3625 if (size != 1) { 3626 nz = procsnz[i] - extra_rows; 3627 vals = buf; 3628 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3629 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3630 ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3631 } 3632 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3633 } else { 3634 /* receive numeric values */ 3635 ierr = PetscMalloc1(nz+1,&buf);CHKERRQ(ierr); 3636 3637 /* receive message of values*/ 3638 vals = buf; 3639 mycols = ibuf; 3640 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3641 3642 /* insert into matrix */ 3643 jj = rstart*bs; 3644 for (i=0; i<m; i++) { 3645 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3646 mycols += locrowlens[i]; 3647 vals += locrowlens[i]; 3648 jj++; 3649 } 3650 } 3651 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3652 ierr = PetscFree(buf);CHKERRQ(ierr); 3653 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3654 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3655 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3656 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3657 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3658 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3659 PetscFunctionReturn(0); 3660 } 3661 3662 /*@ 3663 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3664 3665 Input Parameters: 3666 . mat - the matrix 3667 . fact - factor 3668 3669 Not Collective, each process can use a different factor 3670 3671 Level: advanced 3672 3673 Notes: 3674 This can also be set by the command line option: -mat_use_hash_table <fact> 3675 3676 .keywords: matrix, hashtable, factor, HT 3677 3678 .seealso: MatSetOption() 3679 @*/ 3680 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3681 { 3682 PetscErrorCode ierr; 3683 3684 PetscFunctionBegin; 3685 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3686 PetscFunctionReturn(0); 3687 } 3688 3689 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3690 { 3691 Mat_MPIBAIJ *baij; 3692 3693 PetscFunctionBegin; 3694 baij = (Mat_MPIBAIJ*)mat->data; 3695 baij->ht_fact = fact; 3696 PetscFunctionReturn(0); 3697 } 3698 3699 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3700 { 3701 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3702 3703 PetscFunctionBegin; 3704 if (Ad) *Ad = a->A; 3705 if (Ao) *Ao = a->B; 3706 if (colmap) *colmap = a->garray; 3707 PetscFunctionReturn(0); 3708 } 3709 3710 /* 3711 Special version for direct calls from Fortran (to eliminate two function call overheads 3712 */ 3713 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3714 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3715 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3716 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3717 #endif 3718 3719 /*@C 3720 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3721 3722 Collective on Mat 3723 3724 Input Parameters: 3725 + mat - the matrix 3726 . min - number of input rows 3727 . im - input rows 3728 . nin - number of input columns 3729 . in - input columns 3730 . v - numerical values input 3731 - addvin - INSERT_VALUES or ADD_VALUES 3732 3733 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3734 3735 Level: advanced 3736 3737 .seealso: MatSetValuesBlocked() 3738 @*/ 3739 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3740 { 3741 /* convert input arguments to C version */ 3742 Mat mat = *matin; 3743 PetscInt m = *min, n = *nin; 3744 InsertMode addv = *addvin; 3745 3746 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3747 const MatScalar *value; 3748 MatScalar *barray = baij->barray; 3749 PetscBool roworiented = baij->roworiented; 3750 PetscErrorCode ierr; 3751 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3752 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3753 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3754 3755 PetscFunctionBegin; 3756 /* tasks normally handled by MatSetValuesBlocked() */ 3757 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3758 #if defined(PETSC_USE_DEBUG) 3759 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3760 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3761 #endif 3762 if (mat->assembled) { 3763 mat->was_assembled = PETSC_TRUE; 3764 mat->assembled = PETSC_FALSE; 3765 } 3766 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3767 3768 3769 if (!barray) { 3770 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 3771 baij->barray = barray; 3772 } 3773 3774 if (roworiented) stepval = (n-1)*bs; 3775 else stepval = (m-1)*bs; 3776 3777 for (i=0; i<m; i++) { 3778 if (im[i] < 0) continue; 3779 #if defined(PETSC_USE_DEBUG) 3780 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3781 #endif 3782 if (im[i] >= rstart && im[i] < rend) { 3783 row = im[i] - rstart; 3784 for (j=0; j<n; j++) { 3785 /* If NumCol = 1 then a copy is not required */ 3786 if ((roworiented) && (n == 1)) { 3787 barray = (MatScalar*)v + i*bs2; 3788 } else if ((!roworiented) && (m == 1)) { 3789 barray = (MatScalar*)v + j*bs2; 3790 } else { /* Here a copy is required */ 3791 if (roworiented) { 3792 value = v + i*(stepval+bs)*bs + j*bs; 3793 } else { 3794 value = v + j*(stepval+bs)*bs + i*bs; 3795 } 3796 for (ii=0; ii<bs; ii++,value+=stepval) { 3797 for (jj=0; jj<bs; jj++) { 3798 *barray++ = *value++; 3799 } 3800 } 3801 barray -=bs2; 3802 } 3803 3804 if (in[j] >= cstart && in[j] < cend) { 3805 col = in[j] - cstart; 3806 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 3807 } else if (in[j] < 0) continue; 3808 #if defined(PETSC_USE_DEBUG) 3809 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 3810 #endif 3811 else { 3812 if (mat->was_assembled) { 3813 if (!baij->colmap) { 3814 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3815 } 3816 3817 #if defined(PETSC_USE_DEBUG) 3818 #if defined(PETSC_USE_CTABLE) 3819 { PetscInt data; 3820 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3821 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3822 } 3823 #else 3824 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3825 #endif 3826 #endif 3827 #if defined(PETSC_USE_CTABLE) 3828 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3829 col = (col - 1)/bs; 3830 #else 3831 col = (baij->colmap[in[j]] - 1)/bs; 3832 #endif 3833 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3834 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3835 col = in[j]; 3836 } 3837 } else col = in[j]; 3838 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 3839 } 3840 } 3841 } else { 3842 if (!baij->donotstash) { 3843 if (roworiented) { 3844 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3845 } else { 3846 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3847 } 3848 } 3849 } 3850 } 3851 3852 /* task normally handled by MatSetValuesBlocked() */ 3853 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3854 PetscFunctionReturn(0); 3855 } 3856 3857 /*@ 3858 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 3859 CSR format the local rows. 3860 3861 Collective on MPI_Comm 3862 3863 Input Parameters: 3864 + comm - MPI communicator 3865 . bs - the block size, only a block size of 1 is supported 3866 . m - number of local rows (Cannot be PETSC_DECIDE) 3867 . n - This value should be the same as the local size used in creating the 3868 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3869 calculated if N is given) For square matrices n is almost always m. 3870 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3871 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3872 . i - row indices 3873 . j - column indices 3874 - a - matrix values 3875 3876 Output Parameter: 3877 . mat - the matrix 3878 3879 Level: intermediate 3880 3881 Notes: 3882 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3883 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3884 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3885 3886 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3887 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3888 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3889 with column-major ordering within blocks. 3890 3891 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3892 3893 .keywords: matrix, aij, compressed row, sparse, parallel 3894 3895 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3896 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3897 @*/ 3898 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3899 { 3900 PetscErrorCode ierr; 3901 3902 PetscFunctionBegin; 3903 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3904 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3905 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3906 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3907 ierr = MatSetType(*mat,MATMPIBAIJ);CHKERRQ(ierr); 3908 ierr = MatSetBlockSize(*mat,bs);CHKERRQ(ierr); 3909 ierr = MatSetUp(*mat);CHKERRQ(ierr); 3910 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3911 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3912 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3913 PetscFunctionReturn(0); 3914 } 3915 3916 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3917 { 3918 PetscErrorCode ierr; 3919 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3920 PetscInt *indx; 3921 PetscScalar *values; 3922 3923 PetscFunctionBegin; 3924 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3925 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3926 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inmat->data; 3927 PetscInt *dnz,*onz,mbs,Nbs,nbs; 3928 PetscInt *bindx,rmax=a->rmax,j; 3929 PetscMPIInt rank,size; 3930 3931 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3932 mbs = m/bs; Nbs = N/cbs; 3933 if (n == PETSC_DECIDE) { 3934 nbs = n; 3935 ierr = PetscSplitOwnership(comm,&nbs,&Nbs);CHKERRQ(ierr); 3936 n = nbs*cbs; 3937 } else { 3938 nbs = n/cbs; 3939 } 3940 3941 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 3942 ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */ 3943 3944 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3945 ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr); 3946 if (rank == size-1) { 3947 /* Check sum(nbs) = Nbs */ 3948 if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs); 3949 } 3950 3951 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */ 3952 for (i=0; i<mbs; i++) { 3953 ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3954 nnz = nnz/bs; 3955 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3956 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3957 ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3958 } 3959 ierr = PetscFree(bindx);CHKERRQ(ierr); 3960 3961 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3962 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3963 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3964 ierr = MatSetType(*outmat,MATBAIJ);CHKERRQ(ierr); 3965 ierr = MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr); 3966 ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3967 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3968 } 3969 3970 /* numeric phase */ 3971 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3972 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3973 3974 for (i=0; i<m; i++) { 3975 ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3976 Ii = i + rstart; 3977 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3978 ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3979 } 3980 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3981 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3982 PetscFunctionReturn(0); 3983 } 3984