1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 #if defined(PETSC_HAVE_HYPRE) 8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 9 #endif 10 11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 12 { 13 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 14 PetscErrorCode ierr; 15 PetscInt i,*idxb = 0; 16 PetscScalar *va,*vb; 17 Vec vtmp; 18 19 PetscFunctionBegin; 20 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 21 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 22 if (idx) { 23 for (i=0; i<A->rmap->n; i++) { 24 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 25 } 26 } 27 28 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 29 if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);} 30 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 31 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 32 33 for (i=0; i<A->rmap->n; i++) { 34 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 35 va[i] = vb[i]; 36 if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs); 37 } 38 } 39 40 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 41 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 42 ierr = PetscFree(idxb);CHKERRQ(ierr); 43 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 44 PetscFunctionReturn(0); 45 } 46 47 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 48 { 49 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 54 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 55 PetscFunctionReturn(0); 56 } 57 58 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 59 { 60 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 61 PetscErrorCode ierr; 62 63 PetscFunctionBegin; 64 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 65 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 66 PetscFunctionReturn(0); 67 } 68 69 /* 70 Local utility routine that creates a mapping from the global column 71 number to the local number in the off-diagonal part of the local 72 storage of the matrix. This is done in a non scalable way since the 73 length of colmap equals the global matrix length. 74 */ 75 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 76 { 77 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 78 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 79 PetscErrorCode ierr; 80 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 81 82 PetscFunctionBegin; 83 #if defined(PETSC_USE_CTABLE) 84 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 85 for (i=0; i<nbs; i++) { 86 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 87 } 88 #else 89 ierr = PetscMalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 90 ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 92 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 93 #endif 94 PetscFunctionReturn(0); 95 } 96 97 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 98 { \ 99 \ 100 brow = row/bs; \ 101 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 102 rmax = aimax[brow]; nrow = ailen[brow]; \ 103 bcol = col/bs; \ 104 ridx = row % bs; cidx = col % bs; \ 105 low = 0; high = nrow; \ 106 while (high-low > 3) { \ 107 t = (low+high)/2; \ 108 if (rp[t] > bcol) high = t; \ 109 else low = t; \ 110 } \ 111 for (_i=low; _i<high; _i++) { \ 112 if (rp[_i] > bcol) break; \ 113 if (rp[_i] == bcol) { \ 114 bap = ap + bs2*_i + bs*cidx + ridx; \ 115 if (addv == ADD_VALUES) *bap += value; \ 116 else *bap = value; \ 117 goto a_noinsert; \ 118 } \ 119 } \ 120 if (a->nonew == 1) goto a_noinsert; \ 121 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 122 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 123 N = nrow++ - 1; \ 124 /* shift up all the later entries in this row */ \ 125 for (ii=N; ii>=_i; ii--) { \ 126 rp[ii+1] = rp[ii]; \ 127 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 128 } \ 129 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 130 rp[_i] = bcol; \ 131 ap[bs2*_i + bs*cidx + ridx] = value; \ 132 a_noinsert:; \ 133 ailen[brow] = nrow; \ 134 } 135 136 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 137 { \ 138 brow = row/bs; \ 139 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 140 rmax = bimax[brow]; nrow = bilen[brow]; \ 141 bcol = col/bs; \ 142 ridx = row % bs; cidx = col % bs; \ 143 low = 0; high = nrow; \ 144 while (high-low > 3) { \ 145 t = (low+high)/2; \ 146 if (rp[t] > bcol) high = t; \ 147 else low = t; \ 148 } \ 149 for (_i=low; _i<high; _i++) { \ 150 if (rp[_i] > bcol) break; \ 151 if (rp[_i] == bcol) { \ 152 bap = ap + bs2*_i + bs*cidx + ridx; \ 153 if (addv == ADD_VALUES) *bap += value; \ 154 else *bap = value; \ 155 goto b_noinsert; \ 156 } \ 157 } \ 158 if (b->nonew == 1) goto b_noinsert; \ 159 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 160 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 161 N = nrow++ - 1; \ 162 /* shift up all the later entries in this row */ \ 163 for (ii=N; ii>=_i; ii--) { \ 164 rp[ii+1] = rp[ii]; \ 165 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 166 } \ 167 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 168 rp[_i] = bcol; \ 169 ap[bs2*_i + bs*cidx + ridx] = value; \ 170 b_noinsert:; \ 171 bilen[brow] = nrow; \ 172 } 173 174 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 175 { 176 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 177 MatScalar value; 178 PetscBool roworiented = baij->roworiented; 179 PetscErrorCode ierr; 180 PetscInt i,j,row,col; 181 PetscInt rstart_orig=mat->rmap->rstart; 182 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 183 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 184 185 /* Some Variables required in the macro */ 186 Mat A = baij->A; 187 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 188 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 189 MatScalar *aa =a->a; 190 191 Mat B = baij->B; 192 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 193 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 194 MatScalar *ba =b->a; 195 196 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 197 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 198 MatScalar *ap,*bap; 199 200 PetscFunctionBegin; 201 for (i=0; i<m; i++) { 202 if (im[i] < 0) continue; 203 #if defined(PETSC_USE_DEBUG) 204 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 205 #endif 206 if (im[i] >= rstart_orig && im[i] < rend_orig) { 207 row = im[i] - rstart_orig; 208 for (j=0; j<n; j++) { 209 if (in[j] >= cstart_orig && in[j] < cend_orig) { 210 col = in[j] - cstart_orig; 211 if (roworiented) value = v[i*n+j]; 212 else value = v[i+j*m]; 213 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 214 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 215 } else if (in[j] < 0) continue; 216 #if defined(PETSC_USE_DEBUG) 217 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 218 #endif 219 else { 220 if (mat->was_assembled) { 221 if (!baij->colmap) { 222 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 223 } 224 #if defined(PETSC_USE_CTABLE) 225 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 226 col = col - 1; 227 #else 228 col = baij->colmap[in[j]/bs] - 1; 229 #endif 230 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 231 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 232 col = in[j]; 233 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 234 B = baij->B; 235 b = (Mat_SeqBAIJ*)(B)->data; 236 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 237 ba =b->a; 238 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 239 else col += in[j]%bs; 240 } else col = in[j]; 241 if (roworiented) value = v[i*n+j]; 242 else value = v[i+j*m]; 243 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 244 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 245 } 246 } 247 } else { 248 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 249 if (!baij->donotstash) { 250 mat->assembled = PETSC_FALSE; 251 if (roworiented) { 252 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 253 } else { 254 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 255 } 256 } 257 } 258 } 259 PetscFunctionReturn(0); 260 } 261 262 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 263 { 264 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 265 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 266 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 267 PetscErrorCode ierr; 268 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 269 PetscBool roworiented=a->roworiented; 270 const PetscScalar *value = v; 271 MatScalar *ap,*aa = a->a,*bap; 272 273 PetscFunctionBegin; 274 rp = aj + ai[row]; 275 ap = aa + bs2*ai[row]; 276 rmax = imax[row]; 277 nrow = ailen[row]; 278 value = v; 279 low = 0; 280 high = nrow; 281 while (high-low > 7) { 282 t = (low+high)/2; 283 if (rp[t] > col) high = t; 284 else low = t; 285 } 286 for (i=low; i<high; i++) { 287 if (rp[i] > col) break; 288 if (rp[i] == col) { 289 bap = ap + bs2*i; 290 if (roworiented) { 291 if (is == ADD_VALUES) { 292 for (ii=0; ii<bs; ii++) { 293 for (jj=ii; jj<bs2; jj+=bs) { 294 bap[jj] += *value++; 295 } 296 } 297 } else { 298 for (ii=0; ii<bs; ii++) { 299 for (jj=ii; jj<bs2; jj+=bs) { 300 bap[jj] = *value++; 301 } 302 } 303 } 304 } else { 305 if (is == ADD_VALUES) { 306 for (ii=0; ii<bs; ii++,value+=bs) { 307 for (jj=0; jj<bs; jj++) { 308 bap[jj] += value[jj]; 309 } 310 bap += bs; 311 } 312 } else { 313 for (ii=0; ii<bs; ii++,value+=bs) { 314 for (jj=0; jj<bs; jj++) { 315 bap[jj] = value[jj]; 316 } 317 bap += bs; 318 } 319 } 320 } 321 goto noinsert2; 322 } 323 } 324 if (nonew == 1) goto noinsert2; 325 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 326 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 327 N = nrow++ - 1; high++; 328 /* shift up all the later entries in this row */ 329 for (ii=N; ii>=i; ii--) { 330 rp[ii+1] = rp[ii]; 331 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 332 } 333 if (N >= i) { 334 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 335 } 336 rp[i] = col; 337 bap = ap + bs2*i; 338 if (roworiented) { 339 for (ii=0; ii<bs; ii++) { 340 for (jj=ii; jj<bs2; jj+=bs) { 341 bap[jj] = *value++; 342 } 343 } 344 } else { 345 for (ii=0; ii<bs; ii++) { 346 for (jj=0; jj<bs; jj++) { 347 *bap++ = *value++; 348 } 349 } 350 } 351 noinsert2:; 352 ailen[row] = nrow; 353 PetscFunctionReturn(0); 354 } 355 356 /* 357 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 358 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 359 */ 360 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 361 { 362 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 363 const PetscScalar *value; 364 MatScalar *barray = baij->barray; 365 PetscBool roworiented = baij->roworiented; 366 PetscErrorCode ierr; 367 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 368 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 369 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 370 371 PetscFunctionBegin; 372 if (!barray) { 373 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 374 baij->barray = barray; 375 } 376 377 if (roworiented) stepval = (n-1)*bs; 378 else stepval = (m-1)*bs; 379 380 for (i=0; i<m; i++) { 381 if (im[i] < 0) continue; 382 #if defined(PETSC_USE_DEBUG) 383 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 384 #endif 385 if (im[i] >= rstart && im[i] < rend) { 386 row = im[i] - rstart; 387 for (j=0; j<n; j++) { 388 /* If NumCol = 1 then a copy is not required */ 389 if ((roworiented) && (n == 1)) { 390 barray = (MatScalar*)v + i*bs2; 391 } else if ((!roworiented) && (m == 1)) { 392 barray = (MatScalar*)v + j*bs2; 393 } else { /* Here a copy is required */ 394 if (roworiented) { 395 value = v + (i*(stepval+bs) + j)*bs; 396 } else { 397 value = v + (j*(stepval+bs) + i)*bs; 398 } 399 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 400 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 401 barray += bs; 402 } 403 barray -= bs2; 404 } 405 406 if (in[j] >= cstart && in[j] < cend) { 407 col = in[j] - cstart; 408 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 409 } else if (in[j] < 0) continue; 410 #if defined(PETSC_USE_DEBUG) 411 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 412 #endif 413 else { 414 if (mat->was_assembled) { 415 if (!baij->colmap) { 416 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 417 } 418 419 #if defined(PETSC_USE_DEBUG) 420 #if defined(PETSC_USE_CTABLE) 421 { PetscInt data; 422 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 423 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 424 } 425 #else 426 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 427 #endif 428 #endif 429 #if defined(PETSC_USE_CTABLE) 430 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 431 col = (col - 1)/bs; 432 #else 433 col = (baij->colmap[in[j]] - 1)/bs; 434 #endif 435 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 436 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 437 col = in[j]; 438 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]); 439 } else col = in[j]; 440 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 441 } 442 } 443 } else { 444 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 445 if (!baij->donotstash) { 446 if (roworiented) { 447 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 448 } else { 449 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 450 } 451 } 452 } 453 } 454 PetscFunctionReturn(0); 455 } 456 457 #define HASH_KEY 0.6180339887 458 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 459 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 460 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 461 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 462 { 463 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 464 PetscBool roworiented = baij->roworiented; 465 PetscErrorCode ierr; 466 PetscInt i,j,row,col; 467 PetscInt rstart_orig=mat->rmap->rstart; 468 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 469 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 470 PetscReal tmp; 471 MatScalar **HD = baij->hd,value; 472 #if defined(PETSC_USE_DEBUG) 473 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 474 #endif 475 476 PetscFunctionBegin; 477 for (i=0; i<m; i++) { 478 #if defined(PETSC_USE_DEBUG) 479 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 480 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 481 #endif 482 row = im[i]; 483 if (row >= rstart_orig && row < rend_orig) { 484 for (j=0; j<n; j++) { 485 col = in[j]; 486 if (roworiented) value = v[i*n+j]; 487 else value = v[i+j*m]; 488 /* Look up PetscInto the Hash Table */ 489 key = (row/bs)*Nbs+(col/bs)+1; 490 h1 = HASH(size,key,tmp); 491 492 493 idx = h1; 494 #if defined(PETSC_USE_DEBUG) 495 insert_ct++; 496 total_ct++; 497 if (HT[idx] != key) { 498 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 499 if (idx == size) { 500 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 501 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 502 } 503 } 504 #else 505 if (HT[idx] != key) { 506 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 507 if (idx == size) { 508 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 509 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 510 } 511 } 512 #endif 513 /* A HASH table entry is found, so insert the values at the correct address */ 514 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 515 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 516 } 517 } else if (!baij->donotstash) { 518 if (roworiented) { 519 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 520 } else { 521 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 522 } 523 } 524 } 525 #if defined(PETSC_USE_DEBUG) 526 baij->ht_total_ct += total_ct; 527 baij->ht_insert_ct += insert_ct; 528 #endif 529 PetscFunctionReturn(0); 530 } 531 532 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 533 { 534 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 535 PetscBool roworiented = baij->roworiented; 536 PetscErrorCode ierr; 537 PetscInt i,j,ii,jj,row,col; 538 PetscInt rstart=baij->rstartbs; 539 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 540 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 541 PetscReal tmp; 542 MatScalar **HD = baij->hd,*baij_a; 543 const PetscScalar *v_t,*value; 544 #if defined(PETSC_USE_DEBUG) 545 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 546 #endif 547 548 PetscFunctionBegin; 549 if (roworiented) stepval = (n-1)*bs; 550 else stepval = (m-1)*bs; 551 552 for (i=0; i<m; i++) { 553 #if defined(PETSC_USE_DEBUG) 554 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 555 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 556 #endif 557 row = im[i]; 558 v_t = v + i*nbs2; 559 if (row >= rstart && row < rend) { 560 for (j=0; j<n; j++) { 561 col = in[j]; 562 563 /* Look up into the Hash Table */ 564 key = row*Nbs+col+1; 565 h1 = HASH(size,key,tmp); 566 567 idx = h1; 568 #if defined(PETSC_USE_DEBUG) 569 total_ct++; 570 insert_ct++; 571 if (HT[idx] != key) { 572 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 573 if (idx == size) { 574 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 575 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 576 } 577 } 578 #else 579 if (HT[idx] != key) { 580 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 581 if (idx == size) { 582 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 583 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 584 } 585 } 586 #endif 587 baij_a = HD[idx]; 588 if (roworiented) { 589 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 590 /* value = v + (i*(stepval+bs)+j)*bs; */ 591 value = v_t; 592 v_t += bs; 593 if (addv == ADD_VALUES) { 594 for (ii=0; ii<bs; ii++,value+=stepval) { 595 for (jj=ii; jj<bs2; jj+=bs) { 596 baij_a[jj] += *value++; 597 } 598 } 599 } else { 600 for (ii=0; ii<bs; ii++,value+=stepval) { 601 for (jj=ii; jj<bs2; jj+=bs) { 602 baij_a[jj] = *value++; 603 } 604 } 605 } 606 } else { 607 value = v + j*(stepval+bs)*bs + i*bs; 608 if (addv == ADD_VALUES) { 609 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 610 for (jj=0; jj<bs; jj++) { 611 baij_a[jj] += *value++; 612 } 613 } 614 } else { 615 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 616 for (jj=0; jj<bs; jj++) { 617 baij_a[jj] = *value++; 618 } 619 } 620 } 621 } 622 } 623 } else { 624 if (!baij->donotstash) { 625 if (roworiented) { 626 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 627 } else { 628 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 629 } 630 } 631 } 632 } 633 #if defined(PETSC_USE_DEBUG) 634 baij->ht_total_ct += total_ct; 635 baij->ht_insert_ct += insert_ct; 636 #endif 637 PetscFunctionReturn(0); 638 } 639 640 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 641 { 642 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 643 PetscErrorCode ierr; 644 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 645 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 646 647 PetscFunctionBegin; 648 for (i=0; i<m; i++) { 649 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 650 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 651 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 652 row = idxm[i] - bsrstart; 653 for (j=0; j<n; j++) { 654 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 655 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 656 if (idxn[j] >= bscstart && idxn[j] < bscend) { 657 col = idxn[j] - bscstart; 658 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 659 } else { 660 if (!baij->colmap) { 661 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 662 } 663 #if defined(PETSC_USE_CTABLE) 664 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 665 data--; 666 #else 667 data = baij->colmap[idxn[j]/bs]-1; 668 #endif 669 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 670 else { 671 col = data + idxn[j]%bs; 672 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 673 } 674 } 675 } 676 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 677 } 678 PetscFunctionReturn(0); 679 } 680 681 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 682 { 683 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 684 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 685 PetscErrorCode ierr; 686 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 687 PetscReal sum = 0.0; 688 MatScalar *v; 689 690 PetscFunctionBegin; 691 if (baij->size == 1) { 692 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 693 } else { 694 if (type == NORM_FROBENIUS) { 695 v = amat->a; 696 nz = amat->nz*bs2; 697 for (i=0; i<nz; i++) { 698 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 699 } 700 v = bmat->a; 701 nz = bmat->nz*bs2; 702 for (i=0; i<nz; i++) { 703 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 704 } 705 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 706 *nrm = PetscSqrtReal(*nrm); 707 } else if (type == NORM_1) { /* max column sum */ 708 PetscReal *tmp,*tmp2; 709 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 710 ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr); 711 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 712 v = amat->a; jj = amat->j; 713 for (i=0; i<amat->nz; i++) { 714 for (j=0; j<bs; j++) { 715 col = bs*(cstart + *jj) + j; /* column index */ 716 for (row=0; row<bs; row++) { 717 tmp[col] += PetscAbsScalar(*v); v++; 718 } 719 } 720 jj++; 721 } 722 v = bmat->a; jj = bmat->j; 723 for (i=0; i<bmat->nz; i++) { 724 for (j=0; j<bs; j++) { 725 col = bs*garray[*jj] + j; 726 for (row=0; row<bs; row++) { 727 tmp[col] += PetscAbsScalar(*v); v++; 728 } 729 } 730 jj++; 731 } 732 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 733 *nrm = 0.0; 734 for (j=0; j<mat->cmap->N; j++) { 735 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 736 } 737 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 738 } else if (type == NORM_INFINITY) { /* max row sum */ 739 PetscReal *sums; 740 ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr); 741 sum = 0.0; 742 for (j=0; j<amat->mbs; j++) { 743 for (row=0; row<bs; row++) sums[row] = 0.0; 744 v = amat->a + bs2*amat->i[j]; 745 nz = amat->i[j+1]-amat->i[j]; 746 for (i=0; i<nz; i++) { 747 for (col=0; col<bs; col++) { 748 for (row=0; row<bs; row++) { 749 sums[row] += PetscAbsScalar(*v); v++; 750 } 751 } 752 } 753 v = bmat->a + bs2*bmat->i[j]; 754 nz = bmat->i[j+1]-bmat->i[j]; 755 for (i=0; i<nz; i++) { 756 for (col=0; col<bs; col++) { 757 for (row=0; row<bs; row++) { 758 sums[row] += PetscAbsScalar(*v); v++; 759 } 760 } 761 } 762 for (row=0; row<bs; row++) { 763 if (sums[row] > sum) sum = sums[row]; 764 } 765 } 766 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 767 ierr = PetscFree(sums);CHKERRQ(ierr); 768 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 769 } 770 PetscFunctionReturn(0); 771 } 772 773 /* 774 Creates the hash table, and sets the table 775 This table is created only once. 776 If new entried need to be added to the matrix 777 then the hash table has to be destroyed and 778 recreated. 779 */ 780 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 781 { 782 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 783 Mat A = baij->A,B=baij->B; 784 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 785 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 786 PetscErrorCode ierr; 787 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 788 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 789 PetscInt *HT,key; 790 MatScalar **HD; 791 PetscReal tmp; 792 #if defined(PETSC_USE_INFO) 793 PetscInt ct=0,max=0; 794 #endif 795 796 PetscFunctionBegin; 797 if (baij->ht) PetscFunctionReturn(0); 798 799 baij->ht_size = (PetscInt)(factor*nz); 800 ht_size = baij->ht_size; 801 802 /* Allocate Memory for Hash Table */ 803 ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr); 804 HD = baij->hd; 805 HT = baij->ht; 806 807 /* Loop Over A */ 808 for (i=0; i<a->mbs; i++) { 809 for (j=ai[i]; j<ai[i+1]; j++) { 810 row = i+rstart; 811 col = aj[j]+cstart; 812 813 key = row*Nbs + col + 1; 814 h1 = HASH(ht_size,key,tmp); 815 for (k=0; k<ht_size; k++) { 816 if (!HT[(h1+k)%ht_size]) { 817 HT[(h1+k)%ht_size] = key; 818 HD[(h1+k)%ht_size] = a->a + j*bs2; 819 break; 820 #if defined(PETSC_USE_INFO) 821 } else { 822 ct++; 823 #endif 824 } 825 } 826 #if defined(PETSC_USE_INFO) 827 if (k> max) max = k; 828 #endif 829 } 830 } 831 /* Loop Over B */ 832 for (i=0; i<b->mbs; i++) { 833 for (j=bi[i]; j<bi[i+1]; j++) { 834 row = i+rstart; 835 col = garray[bj[j]]; 836 key = row*Nbs + col + 1; 837 h1 = HASH(ht_size,key,tmp); 838 for (k=0; k<ht_size; k++) { 839 if (!HT[(h1+k)%ht_size]) { 840 HT[(h1+k)%ht_size] = key; 841 HD[(h1+k)%ht_size] = b->a + j*bs2; 842 break; 843 #if defined(PETSC_USE_INFO) 844 } else { 845 ct++; 846 #endif 847 } 848 } 849 #if defined(PETSC_USE_INFO) 850 if (k> max) max = k; 851 #endif 852 } 853 } 854 855 /* Print Summary */ 856 #if defined(PETSC_USE_INFO) 857 for (i=0,j=0; i<ht_size; i++) { 858 if (HT[i]) j++; 859 } 860 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 861 #endif 862 PetscFunctionReturn(0); 863 } 864 865 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 866 { 867 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 868 PetscErrorCode ierr; 869 PetscInt nstash,reallocs; 870 871 PetscFunctionBegin; 872 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 873 874 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 875 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 876 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 877 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 878 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 879 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 880 PetscFunctionReturn(0); 881 } 882 883 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 884 { 885 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 886 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 887 PetscErrorCode ierr; 888 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 889 PetscInt *row,*col; 890 PetscBool r1,r2,r3,other_disassembled; 891 MatScalar *val; 892 PetscMPIInt n; 893 894 PetscFunctionBegin; 895 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 896 if (!baij->donotstash && !mat->nooffprocentries) { 897 while (1) { 898 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 899 if (!flg) break; 900 901 for (i=0; i<n;) { 902 /* Now identify the consecutive vals belonging to the same row */ 903 for (j=i,rstart=row[j]; j<n; j++) { 904 if (row[j] != rstart) break; 905 } 906 if (j < n) ncols = j-i; 907 else ncols = n-i; 908 /* Now assemble all these values with a single function call */ 909 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 910 i = j; 911 } 912 } 913 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 914 /* Now process the block-stash. Since the values are stashed column-oriented, 915 set the roworiented flag to column oriented, and after MatSetValues() 916 restore the original flags */ 917 r1 = baij->roworiented; 918 r2 = a->roworiented; 919 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 920 921 baij->roworiented = PETSC_FALSE; 922 a->roworiented = PETSC_FALSE; 923 924 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 925 while (1) { 926 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 927 if (!flg) break; 928 929 for (i=0; i<n;) { 930 /* Now identify the consecutive vals belonging to the same row */ 931 for (j=i,rstart=row[j]; j<n; j++) { 932 if (row[j] != rstart) break; 933 } 934 if (j < n) ncols = j-i; 935 else ncols = n-i; 936 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 937 i = j; 938 } 939 } 940 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 941 942 baij->roworiented = r1; 943 a->roworiented = r2; 944 945 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 946 } 947 948 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 949 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 950 951 /* determine if any processor has disassembled, if so we must 952 also disassemble ourselfs, in order that we may reassemble. */ 953 /* 954 if nonzero structure of submatrix B cannot change then we know that 955 no processor disassembled thus we can skip this stuff 956 */ 957 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 958 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 959 if (mat->was_assembled && !other_disassembled) { 960 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 961 } 962 } 963 964 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 965 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 966 } 967 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 968 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 969 970 #if defined(PETSC_USE_INFO) 971 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 972 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 973 974 baij->ht_total_ct = 0; 975 baij->ht_insert_ct = 0; 976 } 977 #endif 978 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 979 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 980 981 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 982 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 983 } 984 985 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 986 987 baij->rowvalues = 0; 988 989 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 990 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 991 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 992 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 993 } 994 PetscFunctionReturn(0); 995 } 996 997 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer); 998 #include <petscdraw.h> 999 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1000 { 1001 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1002 PetscErrorCode ierr; 1003 PetscMPIInt rank = baij->rank; 1004 PetscInt bs = mat->rmap->bs; 1005 PetscBool iascii,isdraw; 1006 PetscViewer sviewer; 1007 PetscViewerFormat format; 1008 1009 PetscFunctionBegin; 1010 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1011 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1012 if (iascii) { 1013 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1014 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1015 MatInfo info; 1016 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1017 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1018 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1019 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n", 1020 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr); 1021 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1022 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1023 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1024 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1025 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1026 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1027 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1028 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1029 PetscFunctionReturn(0); 1030 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1031 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1032 PetscFunctionReturn(0); 1033 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1034 PetscFunctionReturn(0); 1035 } 1036 } 1037 1038 if (isdraw) { 1039 PetscDraw draw; 1040 PetscBool isnull; 1041 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1042 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1043 if (isnull) PetscFunctionReturn(0); 1044 } 1045 1046 { 1047 /* assemble the entire matrix onto first processor. */ 1048 Mat A; 1049 Mat_SeqBAIJ *Aloc; 1050 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1051 MatScalar *a; 1052 const char *matname; 1053 1054 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1055 /* Perhaps this should be the type of mat? */ 1056 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1057 if (!rank) { 1058 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1059 } else { 1060 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1061 } 1062 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1063 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 1064 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1065 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1066 1067 /* copy over the A part */ 1068 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1069 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1070 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1071 1072 for (i=0; i<mbs; i++) { 1073 rvals[0] = bs*(baij->rstartbs + i); 1074 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1075 for (j=ai[i]; j<ai[i+1]; j++) { 1076 col = (baij->cstartbs+aj[j])*bs; 1077 for (k=0; k<bs; k++) { 1078 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1079 col++; a += bs; 1080 } 1081 } 1082 } 1083 /* copy over the B part */ 1084 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1085 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1086 for (i=0; i<mbs; i++) { 1087 rvals[0] = bs*(baij->rstartbs + i); 1088 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1089 for (j=ai[i]; j<ai[i+1]; j++) { 1090 col = baij->garray[aj[j]]*bs; 1091 for (k=0; k<bs; k++) { 1092 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1093 col++; a += bs; 1094 } 1095 } 1096 } 1097 ierr = PetscFree(rvals);CHKERRQ(ierr); 1098 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1099 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1100 /* 1101 Everyone has to call to draw the matrix since the graphics waits are 1102 synchronized across all processors that share the PetscDraw object 1103 */ 1104 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1105 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 1106 if (!rank) { 1107 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 1108 ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1109 } 1110 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1111 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1112 ierr = MatDestroy(&A);CHKERRQ(ierr); 1113 } 1114 PetscFunctionReturn(0); 1115 } 1116 1117 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1118 { 1119 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1120 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)a->A->data; 1121 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 1122 PetscErrorCode ierr; 1123 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1124 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1125 int fd; 1126 PetscScalar *column_values; 1127 FILE *file; 1128 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1129 PetscInt message_count,flowcontrolcount; 1130 1131 PetscFunctionBegin; 1132 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1133 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1134 nz = bs2*(A->nz + B->nz); 1135 rlen = mat->rmap->n; 1136 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1137 if (!rank) { 1138 header[0] = MAT_FILE_CLASSID; 1139 header[1] = mat->rmap->N; 1140 header[2] = mat->cmap->N; 1141 1142 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1143 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1144 /* get largest number of rows any processor has */ 1145 range = mat->rmap->range; 1146 for (i=1; i<size; i++) { 1147 rlen = PetscMax(rlen,range[i+1] - range[i]); 1148 } 1149 } else { 1150 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1151 } 1152 1153 ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr); 1154 /* compute lengths of each row */ 1155 for (i=0; i<a->mbs; i++) { 1156 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1157 } 1158 /* store the row lengths to the file */ 1159 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1160 if (!rank) { 1161 MPI_Status status; 1162 ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr); 1163 rlen = (range[1] - range[0])/bs; 1164 for (i=0; i<rlen; i++) { 1165 for (j=0; j<bs; j++) { 1166 row_lens[i*bs+j] = bs*crow_lens[i]; 1167 } 1168 } 1169 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1170 for (i=1; i<size; i++) { 1171 rlen = (range[i+1] - range[i])/bs; 1172 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1173 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1174 for (k=0; k<rlen; k++) { 1175 for (j=0; j<bs; j++) { 1176 row_lens[k*bs+j] = bs*crow_lens[k]; 1177 } 1178 } 1179 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1180 } 1181 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1182 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1183 } else { 1184 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1185 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1186 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1187 } 1188 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1189 1190 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1191 information needed to make it for each row from a block row. This does require more communication but still not more than 1192 the communication needed for the nonzero values */ 1193 nzmax = nz; /* space a largest processor needs */ 1194 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1195 ierr = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr); 1196 cnt = 0; 1197 for (i=0; i<a->mbs; i++) { 1198 pcnt = cnt; 1199 for (j=B->i[i]; j<B->i[i+1]; j++) { 1200 if ((col = garray[B->j[j]]) > cstart) break; 1201 for (l=0; l<bs; l++) { 1202 column_indices[cnt++] = bs*col+l; 1203 } 1204 } 1205 for (k=A->i[i]; k<A->i[i+1]; k++) { 1206 for (l=0; l<bs; l++) { 1207 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1208 } 1209 } 1210 for (; j<B->i[i+1]; j++) { 1211 for (l=0; l<bs; l++) { 1212 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1213 } 1214 } 1215 len = cnt - pcnt; 1216 for (k=1; k<bs; k++) { 1217 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1218 cnt += len; 1219 } 1220 } 1221 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1222 1223 /* store the columns to the file */ 1224 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1225 if (!rank) { 1226 MPI_Status status; 1227 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1228 for (i=1; i<size; i++) { 1229 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1230 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1231 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1232 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1233 } 1234 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1235 } else { 1236 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1237 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1238 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1239 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1240 } 1241 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1242 1243 /* load up the numerical values */ 1244 ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr); 1245 cnt = 0; 1246 for (i=0; i<a->mbs; i++) { 1247 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1248 for (j=B->i[i]; j<B->i[i+1]; j++) { 1249 if (garray[B->j[j]] > cstart) break; 1250 for (l=0; l<bs; l++) { 1251 for (ll=0; ll<bs; ll++) { 1252 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1253 } 1254 } 1255 cnt += bs; 1256 } 1257 for (k=A->i[i]; k<A->i[i+1]; k++) { 1258 for (l=0; l<bs; l++) { 1259 for (ll=0; ll<bs; ll++) { 1260 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1261 } 1262 } 1263 cnt += bs; 1264 } 1265 for (; j<B->i[i+1]; j++) { 1266 for (l=0; l<bs; l++) { 1267 for (ll=0; ll<bs; ll++) { 1268 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1269 } 1270 } 1271 cnt += bs; 1272 } 1273 cnt += (bs-1)*rlen; 1274 } 1275 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1276 1277 /* store the column values to the file */ 1278 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1279 if (!rank) { 1280 MPI_Status status; 1281 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1282 for (i=1; i<size; i++) { 1283 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1284 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1285 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1286 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1287 } 1288 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1289 } else { 1290 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1291 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1292 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1293 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1294 } 1295 ierr = PetscFree(column_values);CHKERRQ(ierr); 1296 1297 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1298 if (file) { 1299 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1300 } 1301 PetscFunctionReturn(0); 1302 } 1303 1304 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1305 { 1306 PetscErrorCode ierr; 1307 PetscBool iascii,isdraw,issocket,isbinary; 1308 1309 PetscFunctionBegin; 1310 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1311 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1312 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1313 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1314 if (iascii || isdraw || issocket) { 1315 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1316 } else if (isbinary) { 1317 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1318 } 1319 PetscFunctionReturn(0); 1320 } 1321 1322 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1323 { 1324 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1325 PetscErrorCode ierr; 1326 1327 PetscFunctionBegin; 1328 #if defined(PETSC_USE_LOG) 1329 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1330 #endif 1331 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1332 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1333 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1334 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1335 #if defined(PETSC_USE_CTABLE) 1336 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1337 #else 1338 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1339 #endif 1340 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1341 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1342 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1343 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1344 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1345 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1346 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1347 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1348 1349 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1350 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1351 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1352 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1353 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1354 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1355 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr); 1356 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1357 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr); 1358 #if defined(PETSC_HAVE_HYPRE) 1359 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL);CHKERRQ(ierr); 1360 #endif 1361 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL);CHKERRQ(ierr); 1362 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpibaij_C",NULL);CHKERRQ(ierr); 1363 PetscFunctionReturn(0); 1364 } 1365 1366 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1367 { 1368 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1369 PetscErrorCode ierr; 1370 PetscInt nt; 1371 1372 PetscFunctionBegin; 1373 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1374 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1375 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1376 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1377 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1378 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1379 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1380 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1381 PetscFunctionReturn(0); 1382 } 1383 1384 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1385 { 1386 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1387 PetscErrorCode ierr; 1388 1389 PetscFunctionBegin; 1390 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1391 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1392 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1393 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1394 PetscFunctionReturn(0); 1395 } 1396 1397 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1398 { 1399 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1400 PetscErrorCode ierr; 1401 1402 PetscFunctionBegin; 1403 /* do nondiagonal part */ 1404 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1405 /* do local part */ 1406 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1407 /* add partial results together */ 1408 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1409 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1410 PetscFunctionReturn(0); 1411 } 1412 1413 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1414 { 1415 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1416 PetscErrorCode ierr; 1417 1418 PetscFunctionBegin; 1419 /* do nondiagonal part */ 1420 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1421 /* do local part */ 1422 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1423 /* add partial results together */ 1424 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1425 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1426 PetscFunctionReturn(0); 1427 } 1428 1429 /* 1430 This only works correctly for square matrices where the subblock A->A is the 1431 diagonal block 1432 */ 1433 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1434 { 1435 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1436 PetscErrorCode ierr; 1437 1438 PetscFunctionBegin; 1439 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1440 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1441 PetscFunctionReturn(0); 1442 } 1443 1444 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1445 { 1446 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1447 PetscErrorCode ierr; 1448 1449 PetscFunctionBegin; 1450 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1451 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1452 PetscFunctionReturn(0); 1453 } 1454 1455 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1456 { 1457 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1458 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1459 PetscErrorCode ierr; 1460 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1461 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1462 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1463 1464 PetscFunctionBegin; 1465 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1466 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1467 mat->getrowactive = PETSC_TRUE; 1468 1469 if (!mat->rowvalues && (idx || v)) { 1470 /* 1471 allocate enough space to hold information from the longest row. 1472 */ 1473 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1474 PetscInt max = 1,mbs = mat->mbs,tmp; 1475 for (i=0; i<mbs; i++) { 1476 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1477 if (max < tmp) max = tmp; 1478 } 1479 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1480 } 1481 lrow = row - brstart; 1482 1483 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1484 if (!v) {pvA = 0; pvB = 0;} 1485 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1486 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1487 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1488 nztot = nzA + nzB; 1489 1490 cmap = mat->garray; 1491 if (v || idx) { 1492 if (nztot) { 1493 /* Sort by increasing column numbers, assuming A and B already sorted */ 1494 PetscInt imark = -1; 1495 if (v) { 1496 *v = v_p = mat->rowvalues; 1497 for (i=0; i<nzB; i++) { 1498 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1499 else break; 1500 } 1501 imark = i; 1502 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1503 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1504 } 1505 if (idx) { 1506 *idx = idx_p = mat->rowindices; 1507 if (imark > -1) { 1508 for (i=0; i<imark; i++) { 1509 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1510 } 1511 } else { 1512 for (i=0; i<nzB; i++) { 1513 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1514 else break; 1515 } 1516 imark = i; 1517 } 1518 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1519 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1520 } 1521 } else { 1522 if (idx) *idx = 0; 1523 if (v) *v = 0; 1524 } 1525 } 1526 *nz = nztot; 1527 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1528 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1529 PetscFunctionReturn(0); 1530 } 1531 1532 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1533 { 1534 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1535 1536 PetscFunctionBegin; 1537 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1538 baij->getrowactive = PETSC_FALSE; 1539 PetscFunctionReturn(0); 1540 } 1541 1542 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1543 { 1544 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1545 PetscErrorCode ierr; 1546 1547 PetscFunctionBegin; 1548 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1549 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1550 PetscFunctionReturn(0); 1551 } 1552 1553 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1554 { 1555 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1556 Mat A = a->A,B = a->B; 1557 PetscErrorCode ierr; 1558 PetscReal isend[5],irecv[5]; 1559 1560 PetscFunctionBegin; 1561 info->block_size = (PetscReal)matin->rmap->bs; 1562 1563 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1564 1565 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1566 isend[3] = info->memory; isend[4] = info->mallocs; 1567 1568 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1569 1570 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1571 isend[3] += info->memory; isend[4] += info->mallocs; 1572 1573 if (flag == MAT_LOCAL) { 1574 info->nz_used = isend[0]; 1575 info->nz_allocated = isend[1]; 1576 info->nz_unneeded = isend[2]; 1577 info->memory = isend[3]; 1578 info->mallocs = isend[4]; 1579 } else if (flag == MAT_GLOBAL_MAX) { 1580 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1581 1582 info->nz_used = irecv[0]; 1583 info->nz_allocated = irecv[1]; 1584 info->nz_unneeded = irecv[2]; 1585 info->memory = irecv[3]; 1586 info->mallocs = irecv[4]; 1587 } else if (flag == MAT_GLOBAL_SUM) { 1588 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1589 1590 info->nz_used = irecv[0]; 1591 info->nz_allocated = irecv[1]; 1592 info->nz_unneeded = irecv[2]; 1593 info->memory = irecv[3]; 1594 info->mallocs = irecv[4]; 1595 } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1596 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1597 info->fill_ratio_needed = 0; 1598 info->factor_mallocs = 0; 1599 PetscFunctionReturn(0); 1600 } 1601 1602 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1603 { 1604 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1605 PetscErrorCode ierr; 1606 1607 PetscFunctionBegin; 1608 switch (op) { 1609 case MAT_NEW_NONZERO_LOCATIONS: 1610 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1611 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1612 case MAT_KEEP_NONZERO_PATTERN: 1613 case MAT_NEW_NONZERO_LOCATION_ERR: 1614 MatCheckPreallocated(A,1); 1615 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1616 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1617 break; 1618 case MAT_ROW_ORIENTED: 1619 MatCheckPreallocated(A,1); 1620 a->roworiented = flg; 1621 1622 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1623 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1624 break; 1625 case MAT_NEW_DIAGONALS: 1626 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1627 break; 1628 case MAT_IGNORE_OFF_PROC_ENTRIES: 1629 a->donotstash = flg; 1630 break; 1631 case MAT_USE_HASH_TABLE: 1632 a->ht_flag = flg; 1633 a->ht_fact = 1.39; 1634 break; 1635 case MAT_SYMMETRIC: 1636 case MAT_STRUCTURALLY_SYMMETRIC: 1637 case MAT_HERMITIAN: 1638 case MAT_SUBMAT_SINGLEIS: 1639 case MAT_SYMMETRY_ETERNAL: 1640 MatCheckPreallocated(A,1); 1641 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1642 break; 1643 default: 1644 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1645 } 1646 PetscFunctionReturn(0); 1647 } 1648 1649 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1650 { 1651 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1652 Mat_SeqBAIJ *Aloc; 1653 Mat B; 1654 PetscErrorCode ierr; 1655 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1656 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1657 MatScalar *a; 1658 1659 PetscFunctionBegin; 1660 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1661 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1662 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1663 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1664 /* Do not know preallocation information, but must set block size */ 1665 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr); 1666 } else { 1667 B = *matout; 1668 } 1669 1670 /* copy over the A part */ 1671 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1672 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1673 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1674 1675 for (i=0; i<mbs; i++) { 1676 rvals[0] = bs*(baij->rstartbs + i); 1677 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1678 for (j=ai[i]; j<ai[i+1]; j++) { 1679 col = (baij->cstartbs+aj[j])*bs; 1680 for (k=0; k<bs; k++) { 1681 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1682 1683 col++; a += bs; 1684 } 1685 } 1686 } 1687 /* copy over the B part */ 1688 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1689 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1690 for (i=0; i<mbs; i++) { 1691 rvals[0] = bs*(baij->rstartbs + i); 1692 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1693 for (j=ai[i]; j<ai[i+1]; j++) { 1694 col = baij->garray[aj[j]]*bs; 1695 for (k=0; k<bs; k++) { 1696 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1697 col++; 1698 a += bs; 1699 } 1700 } 1701 } 1702 ierr = PetscFree(rvals);CHKERRQ(ierr); 1703 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1704 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1705 1706 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1707 else { 1708 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 1709 } 1710 PetscFunctionReturn(0); 1711 } 1712 1713 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1714 { 1715 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1716 Mat a = baij->A,b = baij->B; 1717 PetscErrorCode ierr; 1718 PetscInt s1,s2,s3; 1719 1720 PetscFunctionBegin; 1721 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1722 if (rr) { 1723 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1724 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1725 /* Overlap communication with computation. */ 1726 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1727 } 1728 if (ll) { 1729 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1730 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1731 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1732 } 1733 /* scale the diagonal block */ 1734 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1735 1736 if (rr) { 1737 /* Do a scatter end and then right scale the off-diagonal block */ 1738 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1739 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1740 } 1741 PetscFunctionReturn(0); 1742 } 1743 1744 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1745 { 1746 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1747 PetscInt *lrows; 1748 PetscInt r, len; 1749 PetscBool cong; 1750 PetscErrorCode ierr; 1751 1752 PetscFunctionBegin; 1753 /* get locally owned rows */ 1754 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 1755 /* fix right hand side if needed */ 1756 if (x && b) { 1757 const PetscScalar *xx; 1758 PetscScalar *bb; 1759 1760 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1761 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1762 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 1763 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1764 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1765 } 1766 1767 /* actually zap the local rows */ 1768 /* 1769 Zero the required rows. If the "diagonal block" of the matrix 1770 is square and the user wishes to set the diagonal we use separate 1771 code so that MatSetValues() is not called for each diagonal allocating 1772 new memory, thus calling lots of mallocs and slowing things down. 1773 1774 */ 1775 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1776 ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1777 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 1778 if ((diag != 0.0) && cong) { 1779 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr); 1780 } else if (diag != 0.0) { 1781 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr); 1782 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1783 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1784 for (r = 0; r < len; ++r) { 1785 const PetscInt row = lrows[r] + A->rmap->rstart; 1786 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1787 } 1788 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1789 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1790 } else { 1791 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1792 } 1793 ierr = PetscFree(lrows);CHKERRQ(ierr); 1794 1795 /* only change matrix nonzero state if pattern was allowed to be changed */ 1796 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1797 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1798 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1799 } 1800 PetscFunctionReturn(0); 1801 } 1802 1803 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1804 { 1805 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1806 PetscErrorCode ierr; 1807 PetscMPIInt n = A->rmap->n; 1808 PetscInt i,j,k,r,p = 0,len = 0,row,col,count; 1809 PetscInt *lrows,*owners = A->rmap->range; 1810 PetscSFNode *rrows; 1811 PetscSF sf; 1812 const PetscScalar *xx; 1813 PetscScalar *bb,*mask; 1814 Vec xmask,lmask; 1815 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)l->B->data; 1816 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1817 PetscScalar *aa; 1818 1819 PetscFunctionBegin; 1820 /* Create SF where leaves are input rows and roots are owned rows */ 1821 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1822 for (r = 0; r < n; ++r) lrows[r] = -1; 1823 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1824 for (r = 0; r < N; ++r) { 1825 const PetscInt idx = rows[r]; 1826 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1827 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1828 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1829 } 1830 rrows[r].rank = p; 1831 rrows[r].index = rows[r] - owners[p]; 1832 } 1833 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1834 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1835 /* Collect flags for rows to be zeroed */ 1836 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1837 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1838 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1839 /* Compress and put in row numbers */ 1840 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1841 /* zero diagonal part of matrix */ 1842 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1843 /* handle off diagonal part of matrix */ 1844 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1845 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1846 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1847 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1848 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1849 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1850 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1851 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1852 if (x) { 1853 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1854 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1855 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1856 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1857 } 1858 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1859 /* remove zeroed rows of off diagonal matrix */ 1860 for (i = 0; i < len; ++i) { 1861 row = lrows[i]; 1862 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1863 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 1864 for (k = 0; k < count; ++k) { 1865 aa[0] = 0.0; 1866 aa += bs; 1867 } 1868 } 1869 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1870 for (i = 0; i < l->B->rmap->N; ++i) { 1871 row = i/bs; 1872 for (j = baij->i[row]; j < baij->i[row+1]; ++j) { 1873 for (k = 0; k < bs; ++k) { 1874 col = bs*baij->j[j] + k; 1875 if (PetscAbsScalar(mask[col])) { 1876 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 1877 if (x) bb[i] -= aa[0]*xx[col]; 1878 aa[0] = 0.0; 1879 } 1880 } 1881 } 1882 } 1883 if (x) { 1884 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1885 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1886 } 1887 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1888 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1889 ierr = PetscFree(lrows);CHKERRQ(ierr); 1890 1891 /* only change matrix nonzero state if pattern was allowed to be changed */ 1892 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1893 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1894 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1895 } 1896 PetscFunctionReturn(0); 1897 } 1898 1899 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1900 { 1901 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1902 PetscErrorCode ierr; 1903 1904 PetscFunctionBegin; 1905 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1906 PetscFunctionReturn(0); 1907 } 1908 1909 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 1910 1911 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1912 { 1913 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1914 Mat a,b,c,d; 1915 PetscBool flg; 1916 PetscErrorCode ierr; 1917 1918 PetscFunctionBegin; 1919 a = matA->A; b = matA->B; 1920 c = matB->A; d = matB->B; 1921 1922 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1923 if (flg) { 1924 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1925 } 1926 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1927 PetscFunctionReturn(0); 1928 } 1929 1930 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1931 { 1932 PetscErrorCode ierr; 1933 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1934 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 1935 1936 PetscFunctionBegin; 1937 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1938 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1939 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1940 } else { 1941 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1942 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1943 } 1944 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 1945 PetscFunctionReturn(0); 1946 } 1947 1948 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1949 { 1950 PetscErrorCode ierr; 1951 1952 PetscFunctionBegin; 1953 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1954 PetscFunctionReturn(0); 1955 } 1956 1957 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 1958 { 1959 PetscErrorCode ierr; 1960 PetscInt bs = Y->rmap->bs,m = Y->rmap->N/bs; 1961 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 1962 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 1963 1964 PetscFunctionBegin; 1965 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 1966 PetscFunctionReturn(0); 1967 } 1968 1969 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1970 { 1971 PetscErrorCode ierr; 1972 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 1973 PetscBLASInt bnz,one=1; 1974 Mat_SeqBAIJ *x,*y; 1975 PetscInt bs2 = Y->rmap->bs*Y->rmap->bs; 1976 1977 PetscFunctionBegin; 1978 if (str == SAME_NONZERO_PATTERN) { 1979 PetscScalar alpha = a; 1980 x = (Mat_SeqBAIJ*)xx->A->data; 1981 y = (Mat_SeqBAIJ*)yy->A->data; 1982 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 1983 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1984 x = (Mat_SeqBAIJ*)xx->B->data; 1985 y = (Mat_SeqBAIJ*)yy->B->data; 1986 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 1987 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1988 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1989 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1990 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1991 } else { 1992 Mat B; 1993 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1994 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1995 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1996 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1997 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1998 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1999 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2000 ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr); 2001 ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2002 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2003 ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2004 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 2005 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2006 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2007 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2008 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2009 } 2010 PetscFunctionReturn(0); 2011 } 2012 2013 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 2014 { 2015 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2016 PetscErrorCode ierr; 2017 2018 PetscFunctionBegin; 2019 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2020 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2021 PetscFunctionReturn(0); 2022 } 2023 2024 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2025 { 2026 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2027 PetscErrorCode ierr; 2028 2029 PetscFunctionBegin; 2030 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2031 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2032 PetscFunctionReturn(0); 2033 } 2034 2035 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2036 { 2037 PetscErrorCode ierr; 2038 IS iscol_local; 2039 PetscInt csize; 2040 2041 PetscFunctionBegin; 2042 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2043 if (call == MAT_REUSE_MATRIX) { 2044 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2045 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2046 } else { 2047 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2048 } 2049 ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2050 if (call == MAT_INITIAL_MATRIX) { 2051 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2052 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2053 } 2054 PetscFunctionReturn(0); 2055 } 2056 2057 /* 2058 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2059 in local and then by concatenating the local matrices the end result. 2060 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 2061 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 2062 */ 2063 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2064 { 2065 PetscErrorCode ierr; 2066 PetscMPIInt rank,size; 2067 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2068 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2069 Mat M,Mreuse; 2070 MatScalar *vwork,*aa; 2071 MPI_Comm comm; 2072 IS isrow_new, iscol_new; 2073 Mat_SeqBAIJ *aij; 2074 2075 PetscFunctionBegin; 2076 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2077 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2078 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2079 /* The compression and expansion should be avoided. Doesn't point 2080 out errors, might change the indices, hence buggey */ 2081 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 2082 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 2083 2084 if (call == MAT_REUSE_MATRIX) { 2085 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 2086 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2087 ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse);CHKERRQ(ierr); 2088 } else { 2089 ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse);CHKERRQ(ierr); 2090 } 2091 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 2092 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 2093 /* 2094 m - number of local rows 2095 n - number of columns (same on all processors) 2096 rstart - first row in new global matrix generated 2097 */ 2098 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2099 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2100 m = m/bs; 2101 n = n/bs; 2102 2103 if (call == MAT_INITIAL_MATRIX) { 2104 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2105 ii = aij->i; 2106 jj = aij->j; 2107 2108 /* 2109 Determine the number of non-zeros in the diagonal and off-diagonal 2110 portions of the matrix in order to do correct preallocation 2111 */ 2112 2113 /* first get start and end of "diagonal" columns */ 2114 if (csize == PETSC_DECIDE) { 2115 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2116 if (mglobal == n*bs) { /* square matrix */ 2117 nlocal = m; 2118 } else { 2119 nlocal = n/size + ((n % size) > rank); 2120 } 2121 } else { 2122 nlocal = csize/bs; 2123 } 2124 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2125 rstart = rend - nlocal; 2126 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2127 2128 /* next, compute all the lengths */ 2129 ierr = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr); 2130 for (i=0; i<m; i++) { 2131 jend = ii[i+1] - ii[i]; 2132 olen = 0; 2133 dlen = 0; 2134 for (j=0; j<jend; j++) { 2135 if (*jj < rstart || *jj >= rend) olen++; 2136 else dlen++; 2137 jj++; 2138 } 2139 olens[i] = olen; 2140 dlens[i] = dlen; 2141 } 2142 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2143 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2144 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2145 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2146 ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2147 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 2148 } else { 2149 PetscInt ml,nl; 2150 2151 M = *newmat; 2152 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2153 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2154 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2155 /* 2156 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2157 rather than the slower MatSetValues(). 2158 */ 2159 M->was_assembled = PETSC_TRUE; 2160 M->assembled = PETSC_FALSE; 2161 } 2162 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2163 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2164 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2165 ii = aij->i; 2166 jj = aij->j; 2167 aa = aij->a; 2168 for (i=0; i<m; i++) { 2169 row = rstart/bs + i; 2170 nz = ii[i+1] - ii[i]; 2171 cwork = jj; jj += nz; 2172 vwork = aa; aa += nz*bs*bs; 2173 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2174 } 2175 2176 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2177 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2178 *newmat = M; 2179 2180 /* save submatrix used in processor for next request */ 2181 if (call == MAT_INITIAL_MATRIX) { 2182 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2183 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2184 } 2185 PetscFunctionReturn(0); 2186 } 2187 2188 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2189 { 2190 MPI_Comm comm,pcomm; 2191 PetscInt clocal_size,nrows; 2192 const PetscInt *rows; 2193 PetscMPIInt size; 2194 IS crowp,lcolp; 2195 PetscErrorCode ierr; 2196 2197 PetscFunctionBegin; 2198 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2199 /* make a collective version of 'rowp' */ 2200 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2201 if (pcomm==comm) { 2202 crowp = rowp; 2203 } else { 2204 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2205 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2206 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2207 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2208 } 2209 ierr = ISSetPermutation(crowp);CHKERRQ(ierr); 2210 /* make a local version of 'colp' */ 2211 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2212 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2213 if (size==1) { 2214 lcolp = colp; 2215 } else { 2216 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 2217 } 2218 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2219 /* now we just get the submatrix */ 2220 ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr); 2221 ierr = MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2222 /* clean up */ 2223 if (pcomm!=comm) { 2224 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2225 } 2226 if (size>1) { 2227 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2228 } 2229 PetscFunctionReturn(0); 2230 } 2231 2232 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2233 { 2234 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2235 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2236 2237 PetscFunctionBegin; 2238 if (nghosts) *nghosts = B->nbs; 2239 if (ghosts) *ghosts = baij->garray; 2240 PetscFunctionReturn(0); 2241 } 2242 2243 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2244 { 2245 Mat B; 2246 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2247 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2248 Mat_SeqAIJ *b; 2249 PetscErrorCode ierr; 2250 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2251 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2252 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2253 2254 PetscFunctionBegin; 2255 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2256 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2257 2258 /* ---------------------------------------------------------------- 2259 Tell every processor the number of nonzeros per row 2260 */ 2261 ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr); 2262 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2263 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2264 } 2265 ierr = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr); 2266 displs = recvcounts + size; 2267 for (i=0; i<size; i++) { 2268 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2269 displs[i] = A->rmap->range[i]/bs; 2270 } 2271 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2272 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2273 #else 2274 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2275 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2276 #endif 2277 /* --------------------------------------------------------------- 2278 Create the sequential matrix of the same type as the local block diagonal 2279 */ 2280 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2281 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2282 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2283 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2284 b = (Mat_SeqAIJ*)B->data; 2285 2286 /*-------------------------------------------------------------------- 2287 Copy my part of matrix column indices over 2288 */ 2289 sendcount = ad->nz + bd->nz; 2290 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2291 a_jsendbuf = ad->j; 2292 b_jsendbuf = bd->j; 2293 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2294 cnt = 0; 2295 for (i=0; i<n; i++) { 2296 2297 /* put in lower diagonal portion */ 2298 m = bd->i[i+1] - bd->i[i]; 2299 while (m > 0) { 2300 /* is it above diagonal (in bd (compressed) numbering) */ 2301 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2302 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2303 m--; 2304 } 2305 2306 /* put in diagonal portion */ 2307 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2308 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2309 } 2310 2311 /* put in upper diagonal portion */ 2312 while (m-- > 0) { 2313 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2314 } 2315 } 2316 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2317 2318 /*-------------------------------------------------------------------- 2319 Gather all column indices to all processors 2320 */ 2321 for (i=0; i<size; i++) { 2322 recvcounts[i] = 0; 2323 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2324 recvcounts[i] += lens[j]; 2325 } 2326 } 2327 displs[0] = 0; 2328 for (i=1; i<size; i++) { 2329 displs[i] = displs[i-1] + recvcounts[i-1]; 2330 } 2331 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2332 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2333 #else 2334 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2335 #endif 2336 /*-------------------------------------------------------------------- 2337 Assemble the matrix into useable form (note numerical values not yet set) 2338 */ 2339 /* set the b->ilen (length of each row) values */ 2340 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2341 /* set the b->i indices */ 2342 b->i[0] = 0; 2343 for (i=1; i<=A->rmap->N/bs; i++) { 2344 b->i[i] = b->i[i-1] + lens[i-1]; 2345 } 2346 ierr = PetscFree(lens);CHKERRQ(ierr); 2347 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2348 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2349 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2350 2351 if (A->symmetric) { 2352 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2353 } else if (A->hermitian) { 2354 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2355 } else if (A->structurally_symmetric) { 2356 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2357 } 2358 *newmat = B; 2359 PetscFunctionReturn(0); 2360 } 2361 2362 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2363 { 2364 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2365 PetscErrorCode ierr; 2366 Vec bb1 = 0; 2367 2368 PetscFunctionBegin; 2369 if (flag == SOR_APPLY_UPPER) { 2370 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2371 PetscFunctionReturn(0); 2372 } 2373 2374 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2375 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2376 } 2377 2378 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2379 if (flag & SOR_ZERO_INITIAL_GUESS) { 2380 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2381 its--; 2382 } 2383 2384 while (its--) { 2385 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2386 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2387 2388 /* update rhs: bb1 = bb - B*x */ 2389 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2390 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2391 2392 /* local sweep */ 2393 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2394 } 2395 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2396 if (flag & SOR_ZERO_INITIAL_GUESS) { 2397 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2398 its--; 2399 } 2400 while (its--) { 2401 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2402 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2403 2404 /* update rhs: bb1 = bb - B*x */ 2405 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2406 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2407 2408 /* local sweep */ 2409 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2410 } 2411 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2412 if (flag & SOR_ZERO_INITIAL_GUESS) { 2413 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2414 its--; 2415 } 2416 while (its--) { 2417 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2418 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2419 2420 /* update rhs: bb1 = bb - B*x */ 2421 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2422 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2423 2424 /* local sweep */ 2425 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2426 } 2427 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2428 2429 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2430 PetscFunctionReturn(0); 2431 } 2432 2433 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms) 2434 { 2435 PetscErrorCode ierr; 2436 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data; 2437 PetscInt N,i,*garray = aij->garray; 2438 PetscInt ib,jb,bs = A->rmap->bs; 2439 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data; 2440 MatScalar *a_val = a_aij->a; 2441 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data; 2442 MatScalar *b_val = b_aij->a; 2443 PetscReal *work; 2444 2445 PetscFunctionBegin; 2446 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 2447 ierr = PetscCalloc1(N,&work);CHKERRQ(ierr); 2448 if (type == NORM_2) { 2449 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2450 for (jb=0; jb<bs; jb++) { 2451 for (ib=0; ib<bs; ib++) { 2452 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2453 a_val++; 2454 } 2455 } 2456 } 2457 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2458 for (jb=0; jb<bs; jb++) { 2459 for (ib=0; ib<bs; ib++) { 2460 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2461 b_val++; 2462 } 2463 } 2464 } 2465 } else if (type == NORM_1) { 2466 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2467 for (jb=0; jb<bs; jb++) { 2468 for (ib=0; ib<bs; ib++) { 2469 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2470 a_val++; 2471 } 2472 } 2473 } 2474 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2475 for (jb=0; jb<bs; jb++) { 2476 for (ib=0; ib<bs; ib++) { 2477 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2478 b_val++; 2479 } 2480 } 2481 } 2482 } else if (type == NORM_INFINITY) { 2483 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2484 for (jb=0; jb<bs; jb++) { 2485 for (ib=0; ib<bs; ib++) { 2486 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2487 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2488 a_val++; 2489 } 2490 } 2491 } 2492 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2493 for (jb=0; jb<bs; jb++) { 2494 for (ib=0; ib<bs; ib++) { 2495 int col = garray[b_aij->j[i]] * bs + jb; 2496 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2497 b_val++; 2498 } 2499 } 2500 } 2501 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 2502 if (type == NORM_INFINITY) { 2503 ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2504 } else { 2505 ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2506 } 2507 ierr = PetscFree(work);CHKERRQ(ierr); 2508 if (type == NORM_2) { 2509 for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]); 2510 } 2511 PetscFunctionReturn(0); 2512 } 2513 2514 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2515 { 2516 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2517 PetscErrorCode ierr; 2518 2519 PetscFunctionBegin; 2520 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2521 A->factorerrortype = a->A->factorerrortype; 2522 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2523 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2524 PetscFunctionReturn(0); 2525 } 2526 2527 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a) 2528 { 2529 PetscErrorCode ierr; 2530 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ*)Y->data; 2531 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)maij->A->data; 2532 2533 PetscFunctionBegin; 2534 if (!Y->preallocated) { 2535 ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 2536 } else if (!aij->nz) { 2537 PetscInt nonew = aij->nonew; 2538 ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2539 aij->nonew = nonew; 2540 } 2541 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2542 PetscFunctionReturn(0); 2543 } 2544 2545 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool *missing,PetscInt *d) 2546 { 2547 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2548 PetscErrorCode ierr; 2549 2550 PetscFunctionBegin; 2551 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2552 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2553 if (d) { 2554 PetscInt rstart; 2555 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2556 *d += rstart/A->rmap->bs; 2557 2558 } 2559 PetscFunctionReturn(0); 2560 } 2561 2562 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2563 { 2564 PetscFunctionBegin; 2565 *a = ((Mat_MPIBAIJ*)A->data)->A; 2566 PetscFunctionReturn(0); 2567 } 2568 2569 /* -------------------------------------------------------------------*/ 2570 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2571 MatGetRow_MPIBAIJ, 2572 MatRestoreRow_MPIBAIJ, 2573 MatMult_MPIBAIJ, 2574 /* 4*/ MatMultAdd_MPIBAIJ, 2575 MatMultTranspose_MPIBAIJ, 2576 MatMultTransposeAdd_MPIBAIJ, 2577 0, 2578 0, 2579 0, 2580 /*10*/ 0, 2581 0, 2582 0, 2583 MatSOR_MPIBAIJ, 2584 MatTranspose_MPIBAIJ, 2585 /*15*/ MatGetInfo_MPIBAIJ, 2586 MatEqual_MPIBAIJ, 2587 MatGetDiagonal_MPIBAIJ, 2588 MatDiagonalScale_MPIBAIJ, 2589 MatNorm_MPIBAIJ, 2590 /*20*/ MatAssemblyBegin_MPIBAIJ, 2591 MatAssemblyEnd_MPIBAIJ, 2592 MatSetOption_MPIBAIJ, 2593 MatZeroEntries_MPIBAIJ, 2594 /*24*/ MatZeroRows_MPIBAIJ, 2595 0, 2596 0, 2597 0, 2598 0, 2599 /*29*/ MatSetUp_MPIBAIJ, 2600 0, 2601 0, 2602 MatGetDiagonalBlock_MPIBAIJ, 2603 0, 2604 /*34*/ MatDuplicate_MPIBAIJ, 2605 0, 2606 0, 2607 0, 2608 0, 2609 /*39*/ MatAXPY_MPIBAIJ, 2610 MatCreateSubMatrices_MPIBAIJ, 2611 MatIncreaseOverlap_MPIBAIJ, 2612 MatGetValues_MPIBAIJ, 2613 MatCopy_MPIBAIJ, 2614 /*44*/ 0, 2615 MatScale_MPIBAIJ, 2616 MatShift_MPIBAIJ, 2617 0, 2618 MatZeroRowsColumns_MPIBAIJ, 2619 /*49*/ 0, 2620 0, 2621 0, 2622 0, 2623 0, 2624 /*54*/ MatFDColoringCreate_MPIXAIJ, 2625 0, 2626 MatSetUnfactored_MPIBAIJ, 2627 MatPermute_MPIBAIJ, 2628 MatSetValuesBlocked_MPIBAIJ, 2629 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2630 MatDestroy_MPIBAIJ, 2631 MatView_MPIBAIJ, 2632 0, 2633 0, 2634 /*64*/ 0, 2635 0, 2636 0, 2637 0, 2638 0, 2639 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2640 0, 2641 0, 2642 0, 2643 0, 2644 /*74*/ 0, 2645 MatFDColoringApply_BAIJ, 2646 0, 2647 0, 2648 0, 2649 /*79*/ 0, 2650 0, 2651 0, 2652 0, 2653 MatLoad_MPIBAIJ, 2654 /*84*/ 0, 2655 0, 2656 0, 2657 0, 2658 0, 2659 /*89*/ 0, 2660 0, 2661 0, 2662 0, 2663 0, 2664 /*94*/ 0, 2665 0, 2666 0, 2667 0, 2668 0, 2669 /*99*/ 0, 2670 0, 2671 0, 2672 0, 2673 0, 2674 /*104*/0, 2675 MatRealPart_MPIBAIJ, 2676 MatImaginaryPart_MPIBAIJ, 2677 0, 2678 0, 2679 /*109*/0, 2680 0, 2681 0, 2682 0, 2683 MatMissingDiagonal_MPIBAIJ, 2684 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2685 0, 2686 MatGetGhosts_MPIBAIJ, 2687 0, 2688 0, 2689 /*119*/0, 2690 0, 2691 0, 2692 0, 2693 MatGetMultiProcBlock_MPIBAIJ, 2694 /*124*/0, 2695 MatGetColumnNorms_MPIBAIJ, 2696 MatInvertBlockDiagonal_MPIBAIJ, 2697 0, 2698 0, 2699 /*129*/ 0, 2700 0, 2701 0, 2702 0, 2703 0, 2704 /*134*/ 0, 2705 0, 2706 0, 2707 0, 2708 0, 2709 /*139*/ MatSetBlockSizes_Default, 2710 0, 2711 0, 2712 MatFDColoringSetUp_MPIXAIJ, 2713 0, 2714 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ 2715 }; 2716 2717 2718 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 2719 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 2720 2721 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2722 { 2723 PetscInt m,rstart,cstart,cend; 2724 PetscInt i,j,dlen,olen,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2725 const PetscInt *JJ =0; 2726 PetscScalar *values=0; 2727 PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented; 2728 PetscErrorCode ierr; 2729 2730 PetscFunctionBegin; 2731 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2732 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2733 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2734 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2735 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2736 m = B->rmap->n/bs; 2737 rstart = B->rmap->rstart/bs; 2738 cstart = B->cmap->rstart/bs; 2739 cend = B->cmap->rend/bs; 2740 2741 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2742 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2743 for (i=0; i<m; i++) { 2744 nz = ii[i+1] - ii[i]; 2745 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2746 nz_max = PetscMax(nz_max,nz); 2747 dlen = 0; 2748 olen = 0; 2749 JJ = jj + ii[i]; 2750 for (j=0; j<nz; j++) { 2751 if (*JJ < cstart || *JJ >= cend) olen++; 2752 else dlen++; 2753 JJ++; 2754 } 2755 d_nnz[i] = dlen; 2756 o_nnz[i] = olen; 2757 } 2758 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2759 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2760 2761 values = (PetscScalar*)V; 2762 if (!values) { 2763 ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2764 } 2765 for (i=0; i<m; i++) { 2766 PetscInt row = i + rstart; 2767 PetscInt ncols = ii[i+1] - ii[i]; 2768 const PetscInt *icols = jj + ii[i]; 2769 if (!roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2770 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2771 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2772 } else { /* block ordering does not match so we can only insert one block at a time. */ 2773 PetscInt j; 2774 for (j=0; j<ncols; j++) { 2775 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2776 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2777 } 2778 } 2779 } 2780 2781 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2782 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2783 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2784 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2785 PetscFunctionReturn(0); 2786 } 2787 2788 /*@C 2789 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2790 (the default parallel PETSc format). 2791 2792 Collective on MPI_Comm 2793 2794 Input Parameters: 2795 + B - the matrix 2796 . bs - the block size 2797 . i - the indices into j for the start of each local row (starts with zero) 2798 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2799 - v - optional values in the matrix 2800 2801 Level: developer 2802 2803 Notes: 2804 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 2805 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 2806 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2807 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2808 block column and the second index is over columns within a block. 2809 2810 .keywords: matrix, aij, compressed row, sparse, parallel 2811 2812 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ 2813 @*/ 2814 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2815 { 2816 PetscErrorCode ierr; 2817 2818 PetscFunctionBegin; 2819 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2820 PetscValidType(B,1); 2821 PetscValidLogicalCollectiveInt(B,bs,2); 2822 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2823 PetscFunctionReturn(0); 2824 } 2825 2826 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2827 { 2828 Mat_MPIBAIJ *b; 2829 PetscErrorCode ierr; 2830 PetscInt i; 2831 PetscMPIInt size; 2832 2833 PetscFunctionBegin; 2834 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2835 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2836 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2837 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2838 2839 if (d_nnz) { 2840 for (i=0; i<B->rmap->n/bs; i++) { 2841 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2842 } 2843 } 2844 if (o_nnz) { 2845 for (i=0; i<B->rmap->n/bs; i++) { 2846 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2847 } 2848 } 2849 2850 b = (Mat_MPIBAIJ*)B->data; 2851 b->bs2 = bs*bs; 2852 b->mbs = B->rmap->n/bs; 2853 b->nbs = B->cmap->n/bs; 2854 b->Mbs = B->rmap->N/bs; 2855 b->Nbs = B->cmap->N/bs; 2856 2857 for (i=0; i<=b->size; i++) { 2858 b->rangebs[i] = B->rmap->range[i]/bs; 2859 } 2860 b->rstartbs = B->rmap->rstart/bs; 2861 b->rendbs = B->rmap->rend/bs; 2862 b->cstartbs = B->cmap->rstart/bs; 2863 b->cendbs = B->cmap->rend/bs; 2864 2865 #if defined(PETSC_USE_CTABLE) 2866 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2867 #else 2868 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2869 #endif 2870 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2871 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2872 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2873 2874 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2875 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2876 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2877 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2878 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2879 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2880 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2881 2882 if (!B->preallocated) { 2883 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2884 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2885 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2886 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2887 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2888 } 2889 2890 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2891 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2892 B->preallocated = PETSC_TRUE; 2893 B->was_assembled = PETSC_FALSE; 2894 B->assembled = PETSC_FALSE; 2895 PetscFunctionReturn(0); 2896 } 2897 2898 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2899 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2900 2901 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 2902 { 2903 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2904 PetscErrorCode ierr; 2905 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2906 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2907 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2908 2909 PetscFunctionBegin; 2910 ierr = PetscMalloc1(M+1,&ii);CHKERRQ(ierr); 2911 ii[0] = 0; 2912 for (i=0; i<M; i++) { 2913 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2914 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 2915 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2916 /* remove one from count of matrix has diagonal */ 2917 for (j=id[i]; j<id[i+1]; j++) { 2918 if (jd[j] == i) {ii[i+1]--;break;} 2919 } 2920 } 2921 ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr); 2922 cnt = 0; 2923 for (i=0; i<M; i++) { 2924 for (j=io[i]; j<io[i+1]; j++) { 2925 if (garray[jo[j]] > rstart) break; 2926 jj[cnt++] = garray[jo[j]]; 2927 } 2928 for (k=id[i]; k<id[i+1]; k++) { 2929 if (jd[k] != i) { 2930 jj[cnt++] = rstart + jd[k]; 2931 } 2932 } 2933 for (; j<io[i+1]; j++) { 2934 jj[cnt++] = garray[jo[j]]; 2935 } 2936 } 2937 ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr); 2938 PetscFunctionReturn(0); 2939 } 2940 2941 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2942 2943 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 2944 2945 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 2946 { 2947 PetscErrorCode ierr; 2948 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2949 Mat B; 2950 Mat_MPIAIJ *b; 2951 2952 PetscFunctionBegin; 2953 if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 2954 2955 if (reuse == MAT_REUSE_MATRIX) { 2956 B = *newmat; 2957 } else { 2958 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2959 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2960 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2961 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2962 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 2963 ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 2964 } 2965 b = (Mat_MPIAIJ*) B->data; 2966 2967 if (reuse == MAT_REUSE_MATRIX) { 2968 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A);CHKERRQ(ierr); 2969 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B);CHKERRQ(ierr); 2970 } else { 2971 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 2972 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2973 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 2974 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 2975 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 2976 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2977 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2978 } 2979 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2980 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2981 2982 if (reuse == MAT_INPLACE_MATRIX) { 2983 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 2984 } else { 2985 *newmat = B; 2986 } 2987 PetscFunctionReturn(0); 2988 } 2989 2990 /*MC 2991 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2992 2993 Options Database Keys: 2994 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2995 . -mat_block_size <bs> - set the blocksize used to store the matrix 2996 - -mat_use_hash_table <fact> 2997 2998 Level: beginner 2999 3000 .seealso: MatCreateMPIBAIJ 3001 M*/ 3002 3003 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 3004 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 3005 3006 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3007 { 3008 Mat_MPIBAIJ *b; 3009 PetscErrorCode ierr; 3010 PetscBool flg = PETSC_FALSE; 3011 3012 PetscFunctionBegin; 3013 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3014 B->data = (void*)b; 3015 3016 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3017 B->assembled = PETSC_FALSE; 3018 3019 B->insertmode = NOT_SET_VALUES; 3020 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 3021 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 3022 3023 /* build local table of row and column ownerships */ 3024 ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr); 3025 3026 /* build cache for off array entries formed */ 3027 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 3028 3029 b->donotstash = PETSC_FALSE; 3030 b->colmap = NULL; 3031 b->garray = NULL; 3032 b->roworiented = PETSC_TRUE; 3033 3034 /* stuff used in block assembly */ 3035 b->barray = 0; 3036 3037 /* stuff used for matrix vector multiply */ 3038 b->lvec = 0; 3039 b->Mvctx = 0; 3040 3041 /* stuff for MatGetRow() */ 3042 b->rowindices = 0; 3043 b->rowvalues = 0; 3044 b->getrowactive = PETSC_FALSE; 3045 3046 /* hash table stuff */ 3047 b->ht = 0; 3048 b->hd = 0; 3049 b->ht_size = 0; 3050 b->ht_flag = PETSC_FALSE; 3051 b->ht_fact = 0; 3052 b->ht_total_ct = 0; 3053 b->ht_insert_ct = 0; 3054 3055 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 3056 b->ijonly = PETSC_FALSE; 3057 3058 3059 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3060 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3061 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3062 #if defined(PETSC_HAVE_HYPRE) 3063 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 3064 #endif 3065 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3066 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3067 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3068 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3069 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3070 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3071 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpibaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 3072 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 3073 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3074 3075 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3076 ierr = PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg);CHKERRQ(ierr); 3077 if (flg) { 3078 PetscReal fact = 1.39; 3079 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3080 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 3081 if (fact <= 1.0) fact = 1.39; 3082 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3083 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3084 } 3085 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3086 PetscFunctionReturn(0); 3087 } 3088 3089 /*MC 3090 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3091 3092 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3093 and MATMPIBAIJ otherwise. 3094 3095 Options Database Keys: 3096 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3097 3098 Level: beginner 3099 3100 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3101 M*/ 3102 3103 /*@C 3104 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3105 (block compressed row). For good matrix assembly performance 3106 the user should preallocate the matrix storage by setting the parameters 3107 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3108 performance can be increased by more than a factor of 50. 3109 3110 Collective on Mat 3111 3112 Input Parameters: 3113 + B - the matrix 3114 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3115 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3116 . d_nz - number of block nonzeros per block row in diagonal portion of local 3117 submatrix (same for all local rows) 3118 . d_nnz - array containing the number of block nonzeros in the various block rows 3119 of the in diagonal portion of the local (possibly different for each block 3120 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 3121 set it even if it is zero. 3122 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3123 submatrix (same for all local rows). 3124 - o_nnz - array containing the number of nonzeros in the various block rows of the 3125 off-diagonal portion of the local submatrix (possibly different for 3126 each block row) or NULL. 3127 3128 If the *_nnz parameter is given then the *_nz parameter is ignored 3129 3130 Options Database Keys: 3131 + -mat_block_size - size of the blocks to use 3132 - -mat_use_hash_table <fact> 3133 3134 Notes: 3135 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3136 than it must be used on all processors that share the object for that argument. 3137 3138 Storage Information: 3139 For a square global matrix we define each processor's diagonal portion 3140 to be its local rows and the corresponding columns (a square submatrix); 3141 each processor's off-diagonal portion encompasses the remainder of the 3142 local matrix (a rectangular submatrix). 3143 3144 The user can specify preallocated storage for the diagonal part of 3145 the local submatrix with either d_nz or d_nnz (not both). Set 3146 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3147 memory allocation. Likewise, specify preallocated storage for the 3148 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3149 3150 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3151 the figure below we depict these three local rows and all columns (0-11). 3152 3153 .vb 3154 0 1 2 3 4 5 6 7 8 9 10 11 3155 -------------------------- 3156 row 3 |o o o d d d o o o o o o 3157 row 4 |o o o d d d o o o o o o 3158 row 5 |o o o d d d o o o o o o 3159 -------------------------- 3160 .ve 3161 3162 Thus, any entries in the d locations are stored in the d (diagonal) 3163 submatrix, and any entries in the o locations are stored in the 3164 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3165 stored simply in the MATSEQBAIJ format for compressed row storage. 3166 3167 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3168 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3169 In general, for PDE problems in which most nonzeros are near the diagonal, 3170 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3171 or you will get TERRIBLE performance; see the users' manual chapter on 3172 matrices. 3173 3174 You can call MatGetInfo() to get information on how effective the preallocation was; 3175 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3176 You can also run with the option -info and look for messages with the string 3177 malloc in them to see if additional memory allocation was needed. 3178 3179 Level: intermediate 3180 3181 .keywords: matrix, block, aij, compressed row, sparse, parallel 3182 3183 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3184 @*/ 3185 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3186 { 3187 PetscErrorCode ierr; 3188 3189 PetscFunctionBegin; 3190 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3191 PetscValidType(B,1); 3192 PetscValidLogicalCollectiveInt(B,bs,2); 3193 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3194 PetscFunctionReturn(0); 3195 } 3196 3197 /*@C 3198 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3199 (block compressed row). For good matrix assembly performance 3200 the user should preallocate the matrix storage by setting the parameters 3201 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3202 performance can be increased by more than a factor of 50. 3203 3204 Collective on MPI_Comm 3205 3206 Input Parameters: 3207 + comm - MPI communicator 3208 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3209 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3210 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3211 This value should be the same as the local size used in creating the 3212 y vector for the matrix-vector product y = Ax. 3213 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3214 This value should be the same as the local size used in creating the 3215 x vector for the matrix-vector product y = Ax. 3216 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3217 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3218 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3219 submatrix (same for all local rows) 3220 . d_nnz - array containing the number of nonzero blocks in the various block rows 3221 of the in diagonal portion of the local (possibly different for each block 3222 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3223 and set it even if it is zero. 3224 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3225 submatrix (same for all local rows). 3226 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3227 off-diagonal portion of the local submatrix (possibly different for 3228 each block row) or NULL. 3229 3230 Output Parameter: 3231 . A - the matrix 3232 3233 Options Database Keys: 3234 + -mat_block_size - size of the blocks to use 3235 - -mat_use_hash_table <fact> 3236 3237 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3238 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3239 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3240 3241 Notes: 3242 If the *_nnz parameter is given then the *_nz parameter is ignored 3243 3244 A nonzero block is any block that as 1 or more nonzeros in it 3245 3246 The user MUST specify either the local or global matrix dimensions 3247 (possibly both). 3248 3249 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3250 than it must be used on all processors that share the object for that argument. 3251 3252 Storage Information: 3253 For a square global matrix we define each processor's diagonal portion 3254 to be its local rows and the corresponding columns (a square submatrix); 3255 each processor's off-diagonal portion encompasses the remainder of the 3256 local matrix (a rectangular submatrix). 3257 3258 The user can specify preallocated storage for the diagonal part of 3259 the local submatrix with either d_nz or d_nnz (not both). Set 3260 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3261 memory allocation. Likewise, specify preallocated storage for the 3262 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3263 3264 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3265 the figure below we depict these three local rows and all columns (0-11). 3266 3267 .vb 3268 0 1 2 3 4 5 6 7 8 9 10 11 3269 -------------------------- 3270 row 3 |o o o d d d o o o o o o 3271 row 4 |o o o d d d o o o o o o 3272 row 5 |o o o d d d o o o o o o 3273 -------------------------- 3274 .ve 3275 3276 Thus, any entries in the d locations are stored in the d (diagonal) 3277 submatrix, and any entries in the o locations are stored in the 3278 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3279 stored simply in the MATSEQBAIJ format for compressed row storage. 3280 3281 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3282 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3283 In general, for PDE problems in which most nonzeros are near the diagonal, 3284 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3285 or you will get TERRIBLE performance; see the users' manual chapter on 3286 matrices. 3287 3288 Level: intermediate 3289 3290 .keywords: matrix, block, aij, compressed row, sparse, parallel 3291 3292 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3293 @*/ 3294 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3295 { 3296 PetscErrorCode ierr; 3297 PetscMPIInt size; 3298 3299 PetscFunctionBegin; 3300 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3301 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3302 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3303 if (size > 1) { 3304 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3305 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3306 } else { 3307 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3308 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3309 } 3310 PetscFunctionReturn(0); 3311 } 3312 3313 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3314 { 3315 Mat mat; 3316 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3317 PetscErrorCode ierr; 3318 PetscInt len=0; 3319 3320 PetscFunctionBegin; 3321 *newmat = 0; 3322 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3323 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3324 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3325 3326 mat->factortype = matin->factortype; 3327 mat->preallocated = PETSC_TRUE; 3328 mat->assembled = PETSC_TRUE; 3329 mat->insertmode = NOT_SET_VALUES; 3330 3331 a = (Mat_MPIBAIJ*)mat->data; 3332 mat->rmap->bs = matin->rmap->bs; 3333 a->bs2 = oldmat->bs2; 3334 a->mbs = oldmat->mbs; 3335 a->nbs = oldmat->nbs; 3336 a->Mbs = oldmat->Mbs; 3337 a->Nbs = oldmat->Nbs; 3338 3339 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3340 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3341 3342 a->size = oldmat->size; 3343 a->rank = oldmat->rank; 3344 a->donotstash = oldmat->donotstash; 3345 a->roworiented = oldmat->roworiented; 3346 a->rowindices = 0; 3347 a->rowvalues = 0; 3348 a->getrowactive = PETSC_FALSE; 3349 a->barray = 0; 3350 a->rstartbs = oldmat->rstartbs; 3351 a->rendbs = oldmat->rendbs; 3352 a->cstartbs = oldmat->cstartbs; 3353 a->cendbs = oldmat->cendbs; 3354 3355 /* hash table stuff */ 3356 a->ht = 0; 3357 a->hd = 0; 3358 a->ht_size = 0; 3359 a->ht_flag = oldmat->ht_flag; 3360 a->ht_fact = oldmat->ht_fact; 3361 a->ht_total_ct = 0; 3362 a->ht_insert_ct = 0; 3363 3364 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3365 if (oldmat->colmap) { 3366 #if defined(PETSC_USE_CTABLE) 3367 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3368 #else 3369 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 3370 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3371 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3372 #endif 3373 } else a->colmap = 0; 3374 3375 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3376 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 3377 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3378 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3379 } else a->garray = 0; 3380 3381 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3382 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3383 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3384 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3385 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3386 3387 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3388 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3389 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3390 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3391 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3392 *newmat = mat; 3393 PetscFunctionReturn(0); 3394 } 3395 3396 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3397 { 3398 PetscErrorCode ierr; 3399 int fd; 3400 PetscInt i,nz,j,rstart,rend; 3401 PetscScalar *vals,*buf; 3402 MPI_Comm comm; 3403 MPI_Status status; 3404 PetscMPIInt rank,size,maxnz; 3405 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3406 PetscInt *locrowlens = NULL,*procsnz = NULL,*browners = NULL; 3407 PetscInt jj,*mycols,*ibuf,bs = newmat->rmap->bs,Mbs,mbs,extra_rows,mmax; 3408 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3409 PetscInt *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount; 3410 PetscInt dcount,kmax,k,nzcount,tmp,mend; 3411 PetscBool isbinary; 3412 3413 PetscFunctionBegin; 3414 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3415 if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name); 3416 3417 /* force binary viewer to load .info file if it has not yet done so */ 3418 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3419 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3420 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3421 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3422 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3423 if (bs < 0) bs = 1; 3424 3425 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3426 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3427 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3428 if (!rank) { 3429 ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3430 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3431 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3432 } 3433 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3434 M = header[1]; N = header[2]; 3435 3436 /* If global sizes are set, check if they are consistent with that given in the file */ 3437 if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M); 3438 if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N); 3439 3440 if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices"); 3441 3442 /* 3443 This code adds extra rows to make sure the number of rows is 3444 divisible by the blocksize 3445 */ 3446 Mbs = M/bs; 3447 extra_rows = bs - M + bs*Mbs; 3448 if (extra_rows == bs) extra_rows = 0; 3449 else Mbs++; 3450 if (extra_rows && !rank) { 3451 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3452 } 3453 3454 /* determine ownership of all rows */ 3455 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3456 mbs = Mbs/size + ((Mbs % size) > rank); 3457 m = mbs*bs; 3458 } else { /* User set */ 3459 m = newmat->rmap->n; 3460 mbs = m/bs; 3461 } 3462 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 3463 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3464 3465 /* process 0 needs enough room for process with most rows */ 3466 if (!rank) { 3467 mmax = rowners[1]; 3468 for (i=2; i<=size; i++) { 3469 mmax = PetscMax(mmax,rowners[i]); 3470 } 3471 mmax*=bs; 3472 } else mmax = -1; /* unused, but compiler warns anyway */ 3473 3474 rowners[0] = 0; 3475 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3476 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3477 rstart = rowners[rank]; 3478 rend = rowners[rank+1]; 3479 3480 /* distribute row lengths to all processors */ 3481 ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr); 3482 if (!rank) { 3483 mend = m; 3484 if (size == 1) mend = mend - extra_rows; 3485 ierr = PetscBinaryRead(fd,locrowlens,mend,NULL,PETSC_INT);CHKERRQ(ierr); 3486 for (j=mend; j<m; j++) locrowlens[j] = 1; 3487 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3488 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3489 for (j=0; j<m; j++) { 3490 procsnz[0] += locrowlens[j]; 3491 } 3492 for (i=1; i<size; i++) { 3493 mend = browners[i+1] - browners[i]; 3494 if (i == size-1) mend = mend - extra_rows; 3495 ierr = PetscBinaryRead(fd,rowlengths,mend,NULL,PETSC_INT);CHKERRQ(ierr); 3496 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3497 /* calculate the number of nonzeros on each processor */ 3498 for (j=0; j<browners[i+1]-browners[i]; j++) { 3499 procsnz[i] += rowlengths[j]; 3500 } 3501 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3502 } 3503 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3504 } else { 3505 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3506 } 3507 3508 if (!rank) { 3509 /* determine max buffer needed and allocate it */ 3510 maxnz = procsnz[0]; 3511 for (i=1; i<size; i++) { 3512 maxnz = PetscMax(maxnz,procsnz[i]); 3513 } 3514 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3515 3516 /* read in my part of the matrix column indices */ 3517 nz = procsnz[0]; 3518 ierr = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr); 3519 mycols = ibuf; 3520 if (size == 1) nz -= extra_rows; 3521 ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 3522 if (size == 1) { 3523 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 3524 } 3525 3526 /* read in every ones (except the last) and ship off */ 3527 for (i=1; i<size-1; i++) { 3528 nz = procsnz[i]; 3529 ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 3530 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3531 } 3532 /* read in the stuff for the last proc */ 3533 if (size != 1) { 3534 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3535 ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 3536 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3537 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3538 } 3539 ierr = PetscFree(cols);CHKERRQ(ierr); 3540 } else { 3541 /* determine buffer space needed for message */ 3542 nz = 0; 3543 for (i=0; i<m; i++) { 3544 nz += locrowlens[i]; 3545 } 3546 ierr = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr); 3547 mycols = ibuf; 3548 /* receive message of column indices*/ 3549 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3550 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3551 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3552 } 3553 3554 /* loop over local rows, determining number of off diagonal entries */ 3555 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 3556 ierr = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 3557 rowcount = 0; nzcount = 0; 3558 for (i=0; i<mbs; i++) { 3559 dcount = 0; 3560 odcount = 0; 3561 for (j=0; j<bs; j++) { 3562 kmax = locrowlens[rowcount]; 3563 for (k=0; k<kmax; k++) { 3564 tmp = mycols[nzcount++]/bs; 3565 if (!mask[tmp]) { 3566 mask[tmp] = 1; 3567 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3568 else masked1[dcount++] = tmp; 3569 } 3570 } 3571 rowcount++; 3572 } 3573 3574 dlens[i] = dcount; 3575 odlens[i] = odcount; 3576 3577 /* zero out the mask elements we set */ 3578 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3579 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3580 } 3581 3582 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3583 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3584 3585 if (!rank) { 3586 ierr = PetscMalloc1(maxnz+1,&buf);CHKERRQ(ierr); 3587 /* read in my part of the matrix numerical values */ 3588 nz = procsnz[0]; 3589 vals = buf; 3590 mycols = ibuf; 3591 if (size == 1) nz -= extra_rows; 3592 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 3593 if (size == 1) { 3594 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 3595 } 3596 3597 /* insert into matrix */ 3598 jj = rstart*bs; 3599 for (i=0; i<m; i++) { 3600 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3601 mycols += locrowlens[i]; 3602 vals += locrowlens[i]; 3603 jj++; 3604 } 3605 /* read in other processors (except the last one) and ship out */ 3606 for (i=1; i<size-1; i++) { 3607 nz = procsnz[i]; 3608 vals = buf; 3609 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 3610 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3611 } 3612 /* the last proc */ 3613 if (size != 1) { 3614 nz = procsnz[i] - extra_rows; 3615 vals = buf; 3616 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 3617 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3618 ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3619 } 3620 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3621 } else { 3622 /* receive numeric values */ 3623 ierr = PetscMalloc1(nz+1,&buf);CHKERRQ(ierr); 3624 3625 /* receive message of values*/ 3626 vals = buf; 3627 mycols = ibuf; 3628 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3629 3630 /* insert into matrix */ 3631 jj = rstart*bs; 3632 for (i=0; i<m; i++) { 3633 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3634 mycols += locrowlens[i]; 3635 vals += locrowlens[i]; 3636 jj++; 3637 } 3638 } 3639 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3640 ierr = PetscFree(buf);CHKERRQ(ierr); 3641 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3642 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3643 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3644 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3645 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3646 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3647 PetscFunctionReturn(0); 3648 } 3649 3650 /*@ 3651 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3652 3653 Input Parameters: 3654 . mat - the matrix 3655 . fact - factor 3656 3657 Not Collective, each process can use a different factor 3658 3659 Level: advanced 3660 3661 Notes: 3662 This can also be set by the command line option: -mat_use_hash_table <fact> 3663 3664 .keywords: matrix, hashtable, factor, HT 3665 3666 .seealso: MatSetOption() 3667 @*/ 3668 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3669 { 3670 PetscErrorCode ierr; 3671 3672 PetscFunctionBegin; 3673 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3674 PetscFunctionReturn(0); 3675 } 3676 3677 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3678 { 3679 Mat_MPIBAIJ *baij; 3680 3681 PetscFunctionBegin; 3682 baij = (Mat_MPIBAIJ*)mat->data; 3683 baij->ht_fact = fact; 3684 PetscFunctionReturn(0); 3685 } 3686 3687 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3688 { 3689 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3690 PetscBool flg; 3691 PetscErrorCode ierr; 3692 3693 PetscFunctionBegin; 3694 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg);CHKERRQ(ierr); 3695 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input"); 3696 if (Ad) *Ad = a->A; 3697 if (Ao) *Ao = a->B; 3698 if (colmap) *colmap = a->garray; 3699 PetscFunctionReturn(0); 3700 } 3701 3702 /* 3703 Special version for direct calls from Fortran (to eliminate two function call overheads 3704 */ 3705 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3706 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3707 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3708 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3709 #endif 3710 3711 /*@C 3712 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3713 3714 Collective on Mat 3715 3716 Input Parameters: 3717 + mat - the matrix 3718 . min - number of input rows 3719 . im - input rows 3720 . nin - number of input columns 3721 . in - input columns 3722 . v - numerical values input 3723 - addvin - INSERT_VALUES or ADD_VALUES 3724 3725 Notes: 3726 This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3727 3728 Level: advanced 3729 3730 .seealso: MatSetValuesBlocked() 3731 @*/ 3732 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3733 { 3734 /* convert input arguments to C version */ 3735 Mat mat = *matin; 3736 PetscInt m = *min, n = *nin; 3737 InsertMode addv = *addvin; 3738 3739 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3740 const MatScalar *value; 3741 MatScalar *barray = baij->barray; 3742 PetscBool roworiented = baij->roworiented; 3743 PetscErrorCode ierr; 3744 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3745 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3746 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3747 3748 PetscFunctionBegin; 3749 /* tasks normally handled by MatSetValuesBlocked() */ 3750 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3751 #if defined(PETSC_USE_DEBUG) 3752 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3753 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3754 #endif 3755 if (mat->assembled) { 3756 mat->was_assembled = PETSC_TRUE; 3757 mat->assembled = PETSC_FALSE; 3758 } 3759 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3760 3761 3762 if (!barray) { 3763 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 3764 baij->barray = barray; 3765 } 3766 3767 if (roworiented) stepval = (n-1)*bs; 3768 else stepval = (m-1)*bs; 3769 3770 for (i=0; i<m; i++) { 3771 if (im[i] < 0) continue; 3772 #if defined(PETSC_USE_DEBUG) 3773 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3774 #endif 3775 if (im[i] >= rstart && im[i] < rend) { 3776 row = im[i] - rstart; 3777 for (j=0; j<n; j++) { 3778 /* If NumCol = 1 then a copy is not required */ 3779 if ((roworiented) && (n == 1)) { 3780 barray = (MatScalar*)v + i*bs2; 3781 } else if ((!roworiented) && (m == 1)) { 3782 barray = (MatScalar*)v + j*bs2; 3783 } else { /* Here a copy is required */ 3784 if (roworiented) { 3785 value = v + i*(stepval+bs)*bs + j*bs; 3786 } else { 3787 value = v + j*(stepval+bs)*bs + i*bs; 3788 } 3789 for (ii=0; ii<bs; ii++,value+=stepval) { 3790 for (jj=0; jj<bs; jj++) { 3791 *barray++ = *value++; 3792 } 3793 } 3794 barray -=bs2; 3795 } 3796 3797 if (in[j] >= cstart && in[j] < cend) { 3798 col = in[j] - cstart; 3799 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 3800 } else if (in[j] < 0) continue; 3801 #if defined(PETSC_USE_DEBUG) 3802 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 3803 #endif 3804 else { 3805 if (mat->was_assembled) { 3806 if (!baij->colmap) { 3807 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3808 } 3809 3810 #if defined(PETSC_USE_DEBUG) 3811 #if defined(PETSC_USE_CTABLE) 3812 { PetscInt data; 3813 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3814 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3815 } 3816 #else 3817 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3818 #endif 3819 #endif 3820 #if defined(PETSC_USE_CTABLE) 3821 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3822 col = (col - 1)/bs; 3823 #else 3824 col = (baij->colmap[in[j]] - 1)/bs; 3825 #endif 3826 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3827 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3828 col = in[j]; 3829 } 3830 } else col = in[j]; 3831 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 3832 } 3833 } 3834 } else { 3835 if (!baij->donotstash) { 3836 if (roworiented) { 3837 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3838 } else { 3839 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3840 } 3841 } 3842 } 3843 } 3844 3845 /* task normally handled by MatSetValuesBlocked() */ 3846 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3847 PetscFunctionReturn(0); 3848 } 3849 3850 /*@ 3851 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block 3852 CSR format the local rows. 3853 3854 Collective on MPI_Comm 3855 3856 Input Parameters: 3857 + comm - MPI communicator 3858 . bs - the block size, only a block size of 1 is supported 3859 . m - number of local rows (Cannot be PETSC_DECIDE) 3860 . n - This value should be the same as the local size used in creating the 3861 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3862 calculated if N is given) For square matrices n is almost always m. 3863 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3864 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3865 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3866 . j - column indices 3867 - a - matrix values 3868 3869 Output Parameter: 3870 . mat - the matrix 3871 3872 Level: intermediate 3873 3874 Notes: 3875 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3876 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3877 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3878 3879 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3880 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3881 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3882 with column-major ordering within blocks. 3883 3884 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3885 3886 .keywords: matrix, aij, compressed row, sparse, parallel 3887 3888 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3889 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3890 @*/ 3891 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3892 { 3893 PetscErrorCode ierr; 3894 3895 PetscFunctionBegin; 3896 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3897 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3898 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3899 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3900 ierr = MatSetType(*mat,MATMPIBAIJ);CHKERRQ(ierr); 3901 ierr = MatSetBlockSize(*mat,bs);CHKERRQ(ierr); 3902 ierr = MatSetUp(*mat);CHKERRQ(ierr); 3903 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3904 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3905 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3906 PetscFunctionReturn(0); 3907 } 3908 3909 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3910 { 3911 PetscErrorCode ierr; 3912 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3913 PetscInt *indx; 3914 PetscScalar *values; 3915 3916 PetscFunctionBegin; 3917 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3918 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3919 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inmat->data; 3920 PetscInt *dnz,*onz,mbs,Nbs,nbs; 3921 PetscInt *bindx,rmax=a->rmax,j; 3922 PetscMPIInt rank,size; 3923 3924 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3925 mbs = m/bs; Nbs = N/cbs; 3926 if (n == PETSC_DECIDE) { 3927 nbs = n; 3928 ierr = PetscSplitOwnership(comm,&nbs,&Nbs);CHKERRQ(ierr); 3929 n = nbs*cbs; 3930 } else { 3931 nbs = n/cbs; 3932 } 3933 3934 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 3935 ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */ 3936 3937 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3938 ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr); 3939 if (rank == size-1) { 3940 /* Check sum(nbs) = Nbs */ 3941 if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs); 3942 } 3943 3944 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */ 3945 for (i=0; i<mbs; i++) { 3946 ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3947 nnz = nnz/bs; 3948 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3949 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3950 ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3951 } 3952 ierr = PetscFree(bindx);CHKERRQ(ierr); 3953 3954 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3955 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3956 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3957 ierr = MatSetType(*outmat,MATBAIJ);CHKERRQ(ierr); 3958 ierr = MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr); 3959 ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3960 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3961 } 3962 3963 /* numeric phase */ 3964 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3965 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3966 3967 for (i=0; i<m; i++) { 3968 ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3969 Ii = i + rstart; 3970 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3971 ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3972 } 3973 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3974 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3975 PetscFunctionReturn(0); 3976 } 3977