1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 #if defined(PETSC_HAVE_HYPRE) 8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 9 #endif 10 11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 12 { 13 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 14 PetscErrorCode ierr; 15 PetscInt i,*idxb = 0; 16 PetscScalar *va,*vb; 17 Vec vtmp; 18 19 PetscFunctionBegin; 20 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 21 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 22 if (idx) { 23 for (i=0; i<A->rmap->n; i++) { 24 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 25 } 26 } 27 28 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 29 if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);} 30 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 31 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 32 33 for (i=0; i<A->rmap->n; i++) { 34 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 35 va[i] = vb[i]; 36 if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs); 37 } 38 } 39 40 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 41 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 42 ierr = PetscFree(idxb);CHKERRQ(ierr); 43 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 44 PetscFunctionReturn(0); 45 } 46 47 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 48 { 49 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 54 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 55 PetscFunctionReturn(0); 56 } 57 58 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 59 { 60 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 61 PetscErrorCode ierr; 62 63 PetscFunctionBegin; 64 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 65 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 66 PetscFunctionReturn(0); 67 } 68 69 /* 70 Local utility routine that creates a mapping from the global column 71 number to the local number in the off-diagonal part of the local 72 storage of the matrix. This is done in a non scalable way since the 73 length of colmap equals the global matrix length. 74 */ 75 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 76 { 77 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 78 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 79 PetscErrorCode ierr; 80 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 81 82 PetscFunctionBegin; 83 #if defined(PETSC_USE_CTABLE) 84 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 85 for (i=0; i<nbs; i++) { 86 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 87 } 88 #else 89 ierr = PetscMalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 90 ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 92 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 93 #endif 94 PetscFunctionReturn(0); 95 } 96 97 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 98 { \ 99 \ 100 brow = row/bs; \ 101 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 102 rmax = aimax[brow]; nrow = ailen[brow]; \ 103 bcol = col/bs; \ 104 ridx = row % bs; cidx = col % bs; \ 105 low = 0; high = nrow; \ 106 while (high-low > 3) { \ 107 t = (low+high)/2; \ 108 if (rp[t] > bcol) high = t; \ 109 else low = t; \ 110 } \ 111 for (_i=low; _i<high; _i++) { \ 112 if (rp[_i] > bcol) break; \ 113 if (rp[_i] == bcol) { \ 114 bap = ap + bs2*_i + bs*cidx + ridx; \ 115 if (addv == ADD_VALUES) *bap += value; \ 116 else *bap = value; \ 117 goto a_noinsert; \ 118 } \ 119 } \ 120 if (a->nonew == 1) goto a_noinsert; \ 121 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 122 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 123 N = nrow++ - 1; \ 124 /* shift up all the later entries in this row */ \ 125 for (ii=N; ii>=_i; ii--) { \ 126 rp[ii+1] = rp[ii]; \ 127 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 128 } \ 129 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 130 rp[_i] = bcol; \ 131 ap[bs2*_i + bs*cidx + ridx] = value; \ 132 a_noinsert:; \ 133 ailen[brow] = nrow; \ 134 } 135 136 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 137 { \ 138 brow = row/bs; \ 139 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 140 rmax = bimax[brow]; nrow = bilen[brow]; \ 141 bcol = col/bs; \ 142 ridx = row % bs; cidx = col % bs; \ 143 low = 0; high = nrow; \ 144 while (high-low > 3) { \ 145 t = (low+high)/2; \ 146 if (rp[t] > bcol) high = t; \ 147 else low = t; \ 148 } \ 149 for (_i=low; _i<high; _i++) { \ 150 if (rp[_i] > bcol) break; \ 151 if (rp[_i] == bcol) { \ 152 bap = ap + bs2*_i + bs*cidx + ridx; \ 153 if (addv == ADD_VALUES) *bap += value; \ 154 else *bap = value; \ 155 goto b_noinsert; \ 156 } \ 157 } \ 158 if (b->nonew == 1) goto b_noinsert; \ 159 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 160 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 161 N = nrow++ - 1; \ 162 /* shift up all the later entries in this row */ \ 163 for (ii=N; ii>=_i; ii--) { \ 164 rp[ii+1] = rp[ii]; \ 165 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 166 } \ 167 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 168 rp[_i] = bcol; \ 169 ap[bs2*_i + bs*cidx + ridx] = value; \ 170 b_noinsert:; \ 171 bilen[brow] = nrow; \ 172 } 173 174 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 175 { 176 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 177 MatScalar value; 178 PetscBool roworiented = baij->roworiented; 179 PetscErrorCode ierr; 180 PetscInt i,j,row,col; 181 PetscInt rstart_orig=mat->rmap->rstart; 182 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 183 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 184 185 /* Some Variables required in the macro */ 186 Mat A = baij->A; 187 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 188 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 189 MatScalar *aa =a->a; 190 191 Mat B = baij->B; 192 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 193 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 194 MatScalar *ba =b->a; 195 196 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 197 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 198 MatScalar *ap,*bap; 199 200 PetscFunctionBegin; 201 for (i=0; i<m; i++) { 202 if (im[i] < 0) continue; 203 #if defined(PETSC_USE_DEBUG) 204 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 205 #endif 206 if (im[i] >= rstart_orig && im[i] < rend_orig) { 207 row = im[i] - rstart_orig; 208 for (j=0; j<n; j++) { 209 if (in[j] >= cstart_orig && in[j] < cend_orig) { 210 col = in[j] - cstart_orig; 211 if (roworiented) value = v[i*n+j]; 212 else value = v[i+j*m]; 213 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 214 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 215 } else if (in[j] < 0) continue; 216 #if defined(PETSC_USE_DEBUG) 217 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 218 #endif 219 else { 220 if (mat->was_assembled) { 221 if (!baij->colmap) { 222 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 223 } 224 #if defined(PETSC_USE_CTABLE) 225 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 226 col = col - 1; 227 #else 228 col = baij->colmap[in[j]/bs] - 1; 229 #endif 230 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 231 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 232 col = in[j]; 233 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 234 B = baij->B; 235 b = (Mat_SeqBAIJ*)(B)->data; 236 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 237 ba =b->a; 238 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 239 else col += in[j]%bs; 240 } else col = in[j]; 241 if (roworiented) value = v[i*n+j]; 242 else value = v[i+j*m]; 243 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 244 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 245 } 246 } 247 } else { 248 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 249 if (!baij->donotstash) { 250 mat->assembled = PETSC_FALSE; 251 if (roworiented) { 252 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 253 } else { 254 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 255 } 256 } 257 } 258 } 259 PetscFunctionReturn(0); 260 } 261 262 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 263 { 264 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 265 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 266 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 267 PetscErrorCode ierr; 268 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 269 PetscBool roworiented=a->roworiented; 270 const PetscScalar *value = v; 271 MatScalar *ap,*aa = a->a,*bap; 272 273 PetscFunctionBegin; 274 rp = aj + ai[row]; 275 ap = aa + bs2*ai[row]; 276 rmax = imax[row]; 277 nrow = ailen[row]; 278 value = v; 279 low = 0; 280 high = nrow; 281 while (high-low > 7) { 282 t = (low+high)/2; 283 if (rp[t] > col) high = t; 284 else low = t; 285 } 286 for (i=low; i<high; i++) { 287 if (rp[i] > col) break; 288 if (rp[i] == col) { 289 bap = ap + bs2*i; 290 if (roworiented) { 291 if (is == ADD_VALUES) { 292 for (ii=0; ii<bs; ii++) { 293 for (jj=ii; jj<bs2; jj+=bs) { 294 bap[jj] += *value++; 295 } 296 } 297 } else { 298 for (ii=0; ii<bs; ii++) { 299 for (jj=ii; jj<bs2; jj+=bs) { 300 bap[jj] = *value++; 301 } 302 } 303 } 304 } else { 305 if (is == ADD_VALUES) { 306 for (ii=0; ii<bs; ii++,value+=bs) { 307 for (jj=0; jj<bs; jj++) { 308 bap[jj] += value[jj]; 309 } 310 bap += bs; 311 } 312 } else { 313 for (ii=0; ii<bs; ii++,value+=bs) { 314 for (jj=0; jj<bs; jj++) { 315 bap[jj] = value[jj]; 316 } 317 bap += bs; 318 } 319 } 320 } 321 goto noinsert2; 322 } 323 } 324 if (nonew == 1) goto noinsert2; 325 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 326 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 327 N = nrow++ - 1; high++; 328 /* shift up all the later entries in this row */ 329 for (ii=N; ii>=i; ii--) { 330 rp[ii+1] = rp[ii]; 331 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 332 } 333 if (N >= i) { 334 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 335 } 336 rp[i] = col; 337 bap = ap + bs2*i; 338 if (roworiented) { 339 for (ii=0; ii<bs; ii++) { 340 for (jj=ii; jj<bs2; jj+=bs) { 341 bap[jj] = *value++; 342 } 343 } 344 } else { 345 for (ii=0; ii<bs; ii++) { 346 for (jj=0; jj<bs; jj++) { 347 *bap++ = *value++; 348 } 349 } 350 } 351 noinsert2:; 352 ailen[row] = nrow; 353 PetscFunctionReturn(0); 354 } 355 356 /* 357 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 358 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 359 */ 360 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 361 { 362 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 363 const PetscScalar *value; 364 MatScalar *barray = baij->barray; 365 PetscBool roworiented = baij->roworiented; 366 PetscErrorCode ierr; 367 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 368 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 369 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 370 371 PetscFunctionBegin; 372 if (!barray) { 373 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 374 baij->barray = barray; 375 } 376 377 if (roworiented) stepval = (n-1)*bs; 378 else stepval = (m-1)*bs; 379 380 for (i=0; i<m; i++) { 381 if (im[i] < 0) continue; 382 #if defined(PETSC_USE_DEBUG) 383 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 384 #endif 385 if (im[i] >= rstart && im[i] < rend) { 386 row = im[i] - rstart; 387 for (j=0; j<n; j++) { 388 /* If NumCol = 1 then a copy is not required */ 389 if ((roworiented) && (n == 1)) { 390 barray = (MatScalar*)v + i*bs2; 391 } else if ((!roworiented) && (m == 1)) { 392 barray = (MatScalar*)v + j*bs2; 393 } else { /* Here a copy is required */ 394 if (roworiented) { 395 value = v + (i*(stepval+bs) + j)*bs; 396 } else { 397 value = v + (j*(stepval+bs) + i)*bs; 398 } 399 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 400 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 401 barray += bs; 402 } 403 barray -= bs2; 404 } 405 406 if (in[j] >= cstart && in[j] < cend) { 407 col = in[j] - cstart; 408 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 409 } else if (in[j] < 0) continue; 410 #if defined(PETSC_USE_DEBUG) 411 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 412 #endif 413 else { 414 if (mat->was_assembled) { 415 if (!baij->colmap) { 416 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 417 } 418 419 #if defined(PETSC_USE_DEBUG) 420 #if defined(PETSC_USE_CTABLE) 421 { PetscInt data; 422 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 423 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 424 } 425 #else 426 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 427 #endif 428 #endif 429 #if defined(PETSC_USE_CTABLE) 430 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 431 col = (col - 1)/bs; 432 #else 433 col = (baij->colmap[in[j]] - 1)/bs; 434 #endif 435 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 436 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 437 col = in[j]; 438 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]); 439 } else col = in[j]; 440 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 441 } 442 } 443 } else { 444 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 445 if (!baij->donotstash) { 446 if (roworiented) { 447 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 448 } else { 449 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 450 } 451 } 452 } 453 } 454 PetscFunctionReturn(0); 455 } 456 457 #define HASH_KEY 0.6180339887 458 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 459 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 460 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 461 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 462 { 463 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 464 PetscBool roworiented = baij->roworiented; 465 PetscErrorCode ierr; 466 PetscInt i,j,row,col; 467 PetscInt rstart_orig=mat->rmap->rstart; 468 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 469 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 470 PetscReal tmp; 471 MatScalar **HD = baij->hd,value; 472 #if defined(PETSC_USE_DEBUG) 473 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 474 #endif 475 476 PetscFunctionBegin; 477 for (i=0; i<m; i++) { 478 #if defined(PETSC_USE_DEBUG) 479 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 480 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 481 #endif 482 row = im[i]; 483 if (row >= rstart_orig && row < rend_orig) { 484 for (j=0; j<n; j++) { 485 col = in[j]; 486 if (roworiented) value = v[i*n+j]; 487 else value = v[i+j*m]; 488 /* Look up PetscInto the Hash Table */ 489 key = (row/bs)*Nbs+(col/bs)+1; 490 h1 = HASH(size,key,tmp); 491 492 493 idx = h1; 494 #if defined(PETSC_USE_DEBUG) 495 insert_ct++; 496 total_ct++; 497 if (HT[idx] != key) { 498 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 499 if (idx == size) { 500 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 501 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 502 } 503 } 504 #else 505 if (HT[idx] != key) { 506 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 507 if (idx == size) { 508 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 509 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 510 } 511 } 512 #endif 513 /* A HASH table entry is found, so insert the values at the correct address */ 514 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 515 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 516 } 517 } else if (!baij->donotstash) { 518 if (roworiented) { 519 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 520 } else { 521 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 522 } 523 } 524 } 525 #if defined(PETSC_USE_DEBUG) 526 baij->ht_total_ct += total_ct; 527 baij->ht_insert_ct += insert_ct; 528 #endif 529 PetscFunctionReturn(0); 530 } 531 532 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 533 { 534 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 535 PetscBool roworiented = baij->roworiented; 536 PetscErrorCode ierr; 537 PetscInt i,j,ii,jj,row,col; 538 PetscInt rstart=baij->rstartbs; 539 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 540 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 541 PetscReal tmp; 542 MatScalar **HD = baij->hd,*baij_a; 543 const PetscScalar *v_t,*value; 544 #if defined(PETSC_USE_DEBUG) 545 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 546 #endif 547 548 PetscFunctionBegin; 549 if (roworiented) stepval = (n-1)*bs; 550 else stepval = (m-1)*bs; 551 552 for (i=0; i<m; i++) { 553 #if defined(PETSC_USE_DEBUG) 554 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 555 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 556 #endif 557 row = im[i]; 558 v_t = v + i*nbs2; 559 if (row >= rstart && row < rend) { 560 for (j=0; j<n; j++) { 561 col = in[j]; 562 563 /* Look up into the Hash Table */ 564 key = row*Nbs+col+1; 565 h1 = HASH(size,key,tmp); 566 567 idx = h1; 568 #if defined(PETSC_USE_DEBUG) 569 total_ct++; 570 insert_ct++; 571 if (HT[idx] != key) { 572 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 573 if (idx == size) { 574 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 575 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 576 } 577 } 578 #else 579 if (HT[idx] != key) { 580 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 581 if (idx == size) { 582 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 583 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 584 } 585 } 586 #endif 587 baij_a = HD[idx]; 588 if (roworiented) { 589 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 590 /* value = v + (i*(stepval+bs)+j)*bs; */ 591 value = v_t; 592 v_t += bs; 593 if (addv == ADD_VALUES) { 594 for (ii=0; ii<bs; ii++,value+=stepval) { 595 for (jj=ii; jj<bs2; jj+=bs) { 596 baij_a[jj] += *value++; 597 } 598 } 599 } else { 600 for (ii=0; ii<bs; ii++,value+=stepval) { 601 for (jj=ii; jj<bs2; jj+=bs) { 602 baij_a[jj] = *value++; 603 } 604 } 605 } 606 } else { 607 value = v + j*(stepval+bs)*bs + i*bs; 608 if (addv == ADD_VALUES) { 609 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 610 for (jj=0; jj<bs; jj++) { 611 baij_a[jj] += *value++; 612 } 613 } 614 } else { 615 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 616 for (jj=0; jj<bs; jj++) { 617 baij_a[jj] = *value++; 618 } 619 } 620 } 621 } 622 } 623 } else { 624 if (!baij->donotstash) { 625 if (roworiented) { 626 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 627 } else { 628 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 629 } 630 } 631 } 632 } 633 #if defined(PETSC_USE_DEBUG) 634 baij->ht_total_ct += total_ct; 635 baij->ht_insert_ct += insert_ct; 636 #endif 637 PetscFunctionReturn(0); 638 } 639 640 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 641 { 642 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 643 PetscErrorCode ierr; 644 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 645 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 646 647 PetscFunctionBegin; 648 for (i=0; i<m; i++) { 649 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 650 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 651 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 652 row = idxm[i] - bsrstart; 653 for (j=0; j<n; j++) { 654 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 655 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 656 if (idxn[j] >= bscstart && idxn[j] < bscend) { 657 col = idxn[j] - bscstart; 658 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 659 } else { 660 if (!baij->colmap) { 661 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 662 } 663 #if defined(PETSC_USE_CTABLE) 664 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 665 data--; 666 #else 667 data = baij->colmap[idxn[j]/bs]-1; 668 #endif 669 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 670 else { 671 col = data + idxn[j]%bs; 672 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 673 } 674 } 675 } 676 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 677 } 678 PetscFunctionReturn(0); 679 } 680 681 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 682 { 683 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 684 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 685 PetscErrorCode ierr; 686 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 687 PetscReal sum = 0.0; 688 MatScalar *v; 689 690 PetscFunctionBegin; 691 if (baij->size == 1) { 692 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 693 } else { 694 if (type == NORM_FROBENIUS) { 695 v = amat->a; 696 nz = amat->nz*bs2; 697 for (i=0; i<nz; i++) { 698 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 699 } 700 v = bmat->a; 701 nz = bmat->nz*bs2; 702 for (i=0; i<nz; i++) { 703 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 704 } 705 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 706 *nrm = PetscSqrtReal(*nrm); 707 } else if (type == NORM_1) { /* max column sum */ 708 PetscReal *tmp,*tmp2; 709 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 710 ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr); 711 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 712 v = amat->a; jj = amat->j; 713 for (i=0; i<amat->nz; i++) { 714 for (j=0; j<bs; j++) { 715 col = bs*(cstart + *jj) + j; /* column index */ 716 for (row=0; row<bs; row++) { 717 tmp[col] += PetscAbsScalar(*v); v++; 718 } 719 } 720 jj++; 721 } 722 v = bmat->a; jj = bmat->j; 723 for (i=0; i<bmat->nz; i++) { 724 for (j=0; j<bs; j++) { 725 col = bs*garray[*jj] + j; 726 for (row=0; row<bs; row++) { 727 tmp[col] += PetscAbsScalar(*v); v++; 728 } 729 } 730 jj++; 731 } 732 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 733 *nrm = 0.0; 734 for (j=0; j<mat->cmap->N; j++) { 735 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 736 } 737 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 738 } else if (type == NORM_INFINITY) { /* max row sum */ 739 PetscReal *sums; 740 ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr); 741 sum = 0.0; 742 for (j=0; j<amat->mbs; j++) { 743 for (row=0; row<bs; row++) sums[row] = 0.0; 744 v = amat->a + bs2*amat->i[j]; 745 nz = amat->i[j+1]-amat->i[j]; 746 for (i=0; i<nz; i++) { 747 for (col=0; col<bs; col++) { 748 for (row=0; row<bs; row++) { 749 sums[row] += PetscAbsScalar(*v); v++; 750 } 751 } 752 } 753 v = bmat->a + bs2*bmat->i[j]; 754 nz = bmat->i[j+1]-bmat->i[j]; 755 for (i=0; i<nz; i++) { 756 for (col=0; col<bs; col++) { 757 for (row=0; row<bs; row++) { 758 sums[row] += PetscAbsScalar(*v); v++; 759 } 760 } 761 } 762 for (row=0; row<bs; row++) { 763 if (sums[row] > sum) sum = sums[row]; 764 } 765 } 766 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 767 ierr = PetscFree(sums);CHKERRQ(ierr); 768 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 769 } 770 PetscFunctionReturn(0); 771 } 772 773 /* 774 Creates the hash table, and sets the table 775 This table is created only once. 776 If new entried need to be added to the matrix 777 then the hash table has to be destroyed and 778 recreated. 779 */ 780 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 781 { 782 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 783 Mat A = baij->A,B=baij->B; 784 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 785 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 786 PetscErrorCode ierr; 787 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 788 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 789 PetscInt *HT,key; 790 MatScalar **HD; 791 PetscReal tmp; 792 #if defined(PETSC_USE_INFO) 793 PetscInt ct=0,max=0; 794 #endif 795 796 PetscFunctionBegin; 797 if (baij->ht) PetscFunctionReturn(0); 798 799 baij->ht_size = (PetscInt)(factor*nz); 800 ht_size = baij->ht_size; 801 802 /* Allocate Memory for Hash Table */ 803 ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr); 804 HD = baij->hd; 805 HT = baij->ht; 806 807 /* Loop Over A */ 808 for (i=0; i<a->mbs; i++) { 809 for (j=ai[i]; j<ai[i+1]; j++) { 810 row = i+rstart; 811 col = aj[j]+cstart; 812 813 key = row*Nbs + col + 1; 814 h1 = HASH(ht_size,key,tmp); 815 for (k=0; k<ht_size; k++) { 816 if (!HT[(h1+k)%ht_size]) { 817 HT[(h1+k)%ht_size] = key; 818 HD[(h1+k)%ht_size] = a->a + j*bs2; 819 break; 820 #if defined(PETSC_USE_INFO) 821 } else { 822 ct++; 823 #endif 824 } 825 } 826 #if defined(PETSC_USE_INFO) 827 if (k> max) max = k; 828 #endif 829 } 830 } 831 /* Loop Over B */ 832 for (i=0; i<b->mbs; i++) { 833 for (j=bi[i]; j<bi[i+1]; j++) { 834 row = i+rstart; 835 col = garray[bj[j]]; 836 key = row*Nbs + col + 1; 837 h1 = HASH(ht_size,key,tmp); 838 for (k=0; k<ht_size; k++) { 839 if (!HT[(h1+k)%ht_size]) { 840 HT[(h1+k)%ht_size] = key; 841 HD[(h1+k)%ht_size] = b->a + j*bs2; 842 break; 843 #if defined(PETSC_USE_INFO) 844 } else { 845 ct++; 846 #endif 847 } 848 } 849 #if defined(PETSC_USE_INFO) 850 if (k> max) max = k; 851 #endif 852 } 853 } 854 855 /* Print Summary */ 856 #if defined(PETSC_USE_INFO) 857 for (i=0,j=0; i<ht_size; i++) { 858 if (HT[i]) j++; 859 } 860 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 861 #endif 862 PetscFunctionReturn(0); 863 } 864 865 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 866 { 867 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 868 PetscErrorCode ierr; 869 PetscInt nstash,reallocs; 870 871 PetscFunctionBegin; 872 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 873 874 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 875 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 876 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 877 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 878 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 879 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 880 PetscFunctionReturn(0); 881 } 882 883 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 884 { 885 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 886 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 887 PetscErrorCode ierr; 888 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 889 PetscInt *row,*col; 890 PetscBool r1,r2,r3,other_disassembled; 891 MatScalar *val; 892 PetscMPIInt n; 893 894 PetscFunctionBegin; 895 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 896 if (!baij->donotstash && !mat->nooffprocentries) { 897 while (1) { 898 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 899 if (!flg) break; 900 901 for (i=0; i<n;) { 902 /* Now identify the consecutive vals belonging to the same row */ 903 for (j=i,rstart=row[j]; j<n; j++) { 904 if (row[j] != rstart) break; 905 } 906 if (j < n) ncols = j-i; 907 else ncols = n-i; 908 /* Now assemble all these values with a single function call */ 909 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 910 i = j; 911 } 912 } 913 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 914 /* Now process the block-stash. Since the values are stashed column-oriented, 915 set the roworiented flag to column oriented, and after MatSetValues() 916 restore the original flags */ 917 r1 = baij->roworiented; 918 r2 = a->roworiented; 919 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 920 921 baij->roworiented = PETSC_FALSE; 922 a->roworiented = PETSC_FALSE; 923 924 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 925 while (1) { 926 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 927 if (!flg) break; 928 929 for (i=0; i<n;) { 930 /* Now identify the consecutive vals belonging to the same row */ 931 for (j=i,rstart=row[j]; j<n; j++) { 932 if (row[j] != rstart) break; 933 } 934 if (j < n) ncols = j-i; 935 else ncols = n-i; 936 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 937 i = j; 938 } 939 } 940 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 941 942 baij->roworiented = r1; 943 a->roworiented = r2; 944 945 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 946 } 947 948 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 949 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 950 951 /* determine if any processor has disassembled, if so we must 952 also disassemble ourselfs, in order that we may reassemble. */ 953 /* 954 if nonzero structure of submatrix B cannot change then we know that 955 no processor disassembled thus we can skip this stuff 956 */ 957 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 958 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 959 if (mat->was_assembled && !other_disassembled) { 960 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 961 } 962 } 963 964 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 965 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 966 } 967 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 968 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 969 970 #if defined(PETSC_USE_INFO) 971 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 972 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 973 974 baij->ht_total_ct = 0; 975 baij->ht_insert_ct = 0; 976 } 977 #endif 978 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 979 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 980 981 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 982 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 983 } 984 985 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 986 987 baij->rowvalues = 0; 988 989 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 990 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 991 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 992 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 993 } 994 PetscFunctionReturn(0); 995 } 996 997 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer); 998 #include <petscdraw.h> 999 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1000 { 1001 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1002 PetscErrorCode ierr; 1003 PetscMPIInt rank = baij->rank; 1004 PetscInt bs = mat->rmap->bs; 1005 PetscBool iascii,isdraw; 1006 PetscViewer sviewer; 1007 PetscViewerFormat format; 1008 1009 PetscFunctionBegin; 1010 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1011 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1012 if (iascii) { 1013 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1014 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1015 MatInfo info; 1016 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1017 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1018 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1019 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n", 1020 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr); 1021 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1022 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1023 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1024 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1025 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1026 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1027 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1028 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1029 PetscFunctionReturn(0); 1030 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1031 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1032 PetscFunctionReturn(0); 1033 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1034 PetscFunctionReturn(0); 1035 } 1036 } 1037 1038 if (isdraw) { 1039 PetscDraw draw; 1040 PetscBool isnull; 1041 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1042 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1043 if (isnull) PetscFunctionReturn(0); 1044 } 1045 1046 { 1047 /* assemble the entire matrix onto first processor. */ 1048 Mat A; 1049 Mat_SeqBAIJ *Aloc; 1050 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1051 MatScalar *a; 1052 const char *matname; 1053 1054 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1055 /* Perhaps this should be the type of mat? */ 1056 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1057 if (!rank) { 1058 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1059 } else { 1060 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1061 } 1062 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1063 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 1064 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1065 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1066 1067 /* copy over the A part */ 1068 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1069 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1070 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1071 1072 for (i=0; i<mbs; i++) { 1073 rvals[0] = bs*(baij->rstartbs + i); 1074 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1075 for (j=ai[i]; j<ai[i+1]; j++) { 1076 col = (baij->cstartbs+aj[j])*bs; 1077 for (k=0; k<bs; k++) { 1078 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1079 col++; a += bs; 1080 } 1081 } 1082 } 1083 /* copy over the B part */ 1084 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1085 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1086 for (i=0; i<mbs; i++) { 1087 rvals[0] = bs*(baij->rstartbs + i); 1088 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1089 for (j=ai[i]; j<ai[i+1]; j++) { 1090 col = baij->garray[aj[j]]*bs; 1091 for (k=0; k<bs; k++) { 1092 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1093 col++; a += bs; 1094 } 1095 } 1096 } 1097 ierr = PetscFree(rvals);CHKERRQ(ierr); 1098 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1099 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1100 /* 1101 Everyone has to call to draw the matrix since the graphics waits are 1102 synchronized across all processors that share the PetscDraw object 1103 */ 1104 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1105 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 1106 if (!rank) { 1107 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 1108 ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1109 } 1110 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1111 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1112 ierr = MatDestroy(&A);CHKERRQ(ierr); 1113 } 1114 PetscFunctionReturn(0); 1115 } 1116 1117 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1118 { 1119 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1120 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)a->A->data; 1121 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 1122 PetscErrorCode ierr; 1123 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1124 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1125 int fd; 1126 PetscScalar *column_values; 1127 FILE *file; 1128 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1129 PetscInt message_count,flowcontrolcount; 1130 1131 PetscFunctionBegin; 1132 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1133 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1134 nz = bs2*(A->nz + B->nz); 1135 rlen = mat->rmap->n; 1136 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1137 if (!rank) { 1138 header[0] = MAT_FILE_CLASSID; 1139 header[1] = mat->rmap->N; 1140 header[2] = mat->cmap->N; 1141 1142 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1143 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1144 /* get largest number of rows any processor has */ 1145 range = mat->rmap->range; 1146 for (i=1; i<size; i++) { 1147 rlen = PetscMax(rlen,range[i+1] - range[i]); 1148 } 1149 } else { 1150 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1151 } 1152 1153 ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr); 1154 /* compute lengths of each row */ 1155 for (i=0; i<a->mbs; i++) { 1156 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1157 } 1158 /* store the row lengths to the file */ 1159 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1160 if (!rank) { 1161 MPI_Status status; 1162 ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr); 1163 rlen = (range[1] - range[0])/bs; 1164 for (i=0; i<rlen; i++) { 1165 for (j=0; j<bs; j++) { 1166 row_lens[i*bs+j] = bs*crow_lens[i]; 1167 } 1168 } 1169 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1170 for (i=1; i<size; i++) { 1171 rlen = (range[i+1] - range[i])/bs; 1172 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1173 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1174 for (k=0; k<rlen; k++) { 1175 for (j=0; j<bs; j++) { 1176 row_lens[k*bs+j] = bs*crow_lens[k]; 1177 } 1178 } 1179 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1180 } 1181 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1182 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1183 } else { 1184 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1185 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1186 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1187 } 1188 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1189 1190 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1191 information needed to make it for each row from a block row. This does require more communication but still not more than 1192 the communication needed for the nonzero values */ 1193 nzmax = nz; /* space a largest processor needs */ 1194 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1195 ierr = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr); 1196 cnt = 0; 1197 for (i=0; i<a->mbs; i++) { 1198 pcnt = cnt; 1199 for (j=B->i[i]; j<B->i[i+1]; j++) { 1200 if ((col = garray[B->j[j]]) > cstart) break; 1201 for (l=0; l<bs; l++) { 1202 column_indices[cnt++] = bs*col+l; 1203 } 1204 } 1205 for (k=A->i[i]; k<A->i[i+1]; k++) { 1206 for (l=0; l<bs; l++) { 1207 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1208 } 1209 } 1210 for (; j<B->i[i+1]; j++) { 1211 for (l=0; l<bs; l++) { 1212 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1213 } 1214 } 1215 len = cnt - pcnt; 1216 for (k=1; k<bs; k++) { 1217 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1218 cnt += len; 1219 } 1220 } 1221 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1222 1223 /* store the columns to the file */ 1224 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1225 if (!rank) { 1226 MPI_Status status; 1227 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1228 for (i=1; i<size; i++) { 1229 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1230 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1231 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1232 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1233 } 1234 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1235 } else { 1236 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1237 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1238 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1239 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1240 } 1241 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1242 1243 /* load up the numerical values */ 1244 ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr); 1245 cnt = 0; 1246 for (i=0; i<a->mbs; i++) { 1247 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1248 for (j=B->i[i]; j<B->i[i+1]; j++) { 1249 if (garray[B->j[j]] > cstart) break; 1250 for (l=0; l<bs; l++) { 1251 for (ll=0; ll<bs; ll++) { 1252 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1253 } 1254 } 1255 cnt += bs; 1256 } 1257 for (k=A->i[i]; k<A->i[i+1]; k++) { 1258 for (l=0; l<bs; l++) { 1259 for (ll=0; ll<bs; ll++) { 1260 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1261 } 1262 } 1263 cnt += bs; 1264 } 1265 for (; j<B->i[i+1]; j++) { 1266 for (l=0; l<bs; l++) { 1267 for (ll=0; ll<bs; ll++) { 1268 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1269 } 1270 } 1271 cnt += bs; 1272 } 1273 cnt += (bs-1)*rlen; 1274 } 1275 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1276 1277 /* store the column values to the file */ 1278 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1279 if (!rank) { 1280 MPI_Status status; 1281 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1282 for (i=1; i<size; i++) { 1283 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1284 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1285 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1286 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1287 } 1288 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1289 } else { 1290 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1291 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1292 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1293 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1294 } 1295 ierr = PetscFree(column_values);CHKERRQ(ierr); 1296 1297 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1298 if (file) { 1299 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1300 } 1301 PetscFunctionReturn(0); 1302 } 1303 1304 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1305 { 1306 PetscErrorCode ierr; 1307 PetscBool iascii,isdraw,issocket,isbinary; 1308 1309 PetscFunctionBegin; 1310 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1311 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1312 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1313 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1314 if (iascii || isdraw || issocket) { 1315 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1316 } else if (isbinary) { 1317 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1318 } 1319 PetscFunctionReturn(0); 1320 } 1321 1322 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1323 { 1324 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1325 PetscErrorCode ierr; 1326 1327 PetscFunctionBegin; 1328 #if defined(PETSC_USE_LOG) 1329 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1330 #endif 1331 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1332 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1333 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1334 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1335 #if defined(PETSC_USE_CTABLE) 1336 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1337 #else 1338 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1339 #endif 1340 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1341 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1342 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1343 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1344 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1345 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1346 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1347 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1348 1349 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1350 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1351 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1352 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1353 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1354 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1355 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr); 1356 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1357 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr); 1358 #if defined(PETSC_HAVE_HYPRE) 1359 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL);CHKERRQ(ierr); 1360 #endif 1361 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpibaij_C",NULL);CHKERRQ(ierr); 1362 PetscFunctionReturn(0); 1363 } 1364 1365 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1366 { 1367 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1368 PetscErrorCode ierr; 1369 PetscInt nt; 1370 1371 PetscFunctionBegin; 1372 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1373 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1374 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1375 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1376 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1377 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1378 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1379 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1380 PetscFunctionReturn(0); 1381 } 1382 1383 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1384 { 1385 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1386 PetscErrorCode ierr; 1387 1388 PetscFunctionBegin; 1389 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1390 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1391 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1392 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1393 PetscFunctionReturn(0); 1394 } 1395 1396 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1397 { 1398 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1399 PetscErrorCode ierr; 1400 PetscBool merged; 1401 1402 PetscFunctionBegin; 1403 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1404 /* do nondiagonal part */ 1405 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1406 if (!merged) { 1407 /* send it on its way */ 1408 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1409 /* do local part */ 1410 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1411 /* receive remote parts: note this assumes the values are not actually */ 1412 /* inserted in yy until the next line */ 1413 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1414 } else { 1415 /* do local part */ 1416 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1417 /* send it on its way */ 1418 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1419 /* values actually were received in the Begin() but we need to call this nop */ 1420 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1421 } 1422 PetscFunctionReturn(0); 1423 } 1424 1425 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1426 { 1427 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1428 PetscErrorCode ierr; 1429 1430 PetscFunctionBegin; 1431 /* do nondiagonal part */ 1432 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1433 /* send it on its way */ 1434 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1435 /* do local part */ 1436 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1437 /* receive remote parts: note this assumes the values are not actually */ 1438 /* inserted in yy until the next line, which is true for my implementation*/ 1439 /* but is not perhaps always true. */ 1440 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1441 PetscFunctionReturn(0); 1442 } 1443 1444 /* 1445 This only works correctly for square matrices where the subblock A->A is the 1446 diagonal block 1447 */ 1448 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1449 { 1450 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1451 PetscErrorCode ierr; 1452 1453 PetscFunctionBegin; 1454 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1455 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1456 PetscFunctionReturn(0); 1457 } 1458 1459 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1460 { 1461 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1462 PetscErrorCode ierr; 1463 1464 PetscFunctionBegin; 1465 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1466 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1467 PetscFunctionReturn(0); 1468 } 1469 1470 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1471 { 1472 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1473 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1474 PetscErrorCode ierr; 1475 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1476 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1477 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1478 1479 PetscFunctionBegin; 1480 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1481 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1482 mat->getrowactive = PETSC_TRUE; 1483 1484 if (!mat->rowvalues && (idx || v)) { 1485 /* 1486 allocate enough space to hold information from the longest row. 1487 */ 1488 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1489 PetscInt max = 1,mbs = mat->mbs,tmp; 1490 for (i=0; i<mbs; i++) { 1491 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1492 if (max < tmp) max = tmp; 1493 } 1494 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1495 } 1496 lrow = row - brstart; 1497 1498 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1499 if (!v) {pvA = 0; pvB = 0;} 1500 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1501 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1502 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1503 nztot = nzA + nzB; 1504 1505 cmap = mat->garray; 1506 if (v || idx) { 1507 if (nztot) { 1508 /* Sort by increasing column numbers, assuming A and B already sorted */ 1509 PetscInt imark = -1; 1510 if (v) { 1511 *v = v_p = mat->rowvalues; 1512 for (i=0; i<nzB; i++) { 1513 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1514 else break; 1515 } 1516 imark = i; 1517 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1518 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1519 } 1520 if (idx) { 1521 *idx = idx_p = mat->rowindices; 1522 if (imark > -1) { 1523 for (i=0; i<imark; i++) { 1524 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1525 } 1526 } else { 1527 for (i=0; i<nzB; i++) { 1528 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1529 else break; 1530 } 1531 imark = i; 1532 } 1533 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1534 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1535 } 1536 } else { 1537 if (idx) *idx = 0; 1538 if (v) *v = 0; 1539 } 1540 } 1541 *nz = nztot; 1542 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1543 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1544 PetscFunctionReturn(0); 1545 } 1546 1547 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1548 { 1549 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1550 1551 PetscFunctionBegin; 1552 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1553 baij->getrowactive = PETSC_FALSE; 1554 PetscFunctionReturn(0); 1555 } 1556 1557 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1558 { 1559 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1560 PetscErrorCode ierr; 1561 1562 PetscFunctionBegin; 1563 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1564 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1565 PetscFunctionReturn(0); 1566 } 1567 1568 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1569 { 1570 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1571 Mat A = a->A,B = a->B; 1572 PetscErrorCode ierr; 1573 PetscReal isend[5],irecv[5]; 1574 1575 PetscFunctionBegin; 1576 info->block_size = (PetscReal)matin->rmap->bs; 1577 1578 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1579 1580 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1581 isend[3] = info->memory; isend[4] = info->mallocs; 1582 1583 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1584 1585 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1586 isend[3] += info->memory; isend[4] += info->mallocs; 1587 1588 if (flag == MAT_LOCAL) { 1589 info->nz_used = isend[0]; 1590 info->nz_allocated = isend[1]; 1591 info->nz_unneeded = isend[2]; 1592 info->memory = isend[3]; 1593 info->mallocs = isend[4]; 1594 } else if (flag == MAT_GLOBAL_MAX) { 1595 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1596 1597 info->nz_used = irecv[0]; 1598 info->nz_allocated = irecv[1]; 1599 info->nz_unneeded = irecv[2]; 1600 info->memory = irecv[3]; 1601 info->mallocs = irecv[4]; 1602 } else if (flag == MAT_GLOBAL_SUM) { 1603 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1604 1605 info->nz_used = irecv[0]; 1606 info->nz_allocated = irecv[1]; 1607 info->nz_unneeded = irecv[2]; 1608 info->memory = irecv[3]; 1609 info->mallocs = irecv[4]; 1610 } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1611 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1612 info->fill_ratio_needed = 0; 1613 info->factor_mallocs = 0; 1614 PetscFunctionReturn(0); 1615 } 1616 1617 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1618 { 1619 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1620 PetscErrorCode ierr; 1621 1622 PetscFunctionBegin; 1623 switch (op) { 1624 case MAT_NEW_NONZERO_LOCATIONS: 1625 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1626 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1627 case MAT_KEEP_NONZERO_PATTERN: 1628 case MAT_NEW_NONZERO_LOCATION_ERR: 1629 MatCheckPreallocated(A,1); 1630 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1631 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1632 break; 1633 case MAT_ROW_ORIENTED: 1634 MatCheckPreallocated(A,1); 1635 a->roworiented = flg; 1636 1637 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1638 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1639 break; 1640 case MAT_NEW_DIAGONALS: 1641 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1642 break; 1643 case MAT_IGNORE_OFF_PROC_ENTRIES: 1644 a->donotstash = flg; 1645 break; 1646 case MAT_USE_HASH_TABLE: 1647 a->ht_flag = flg; 1648 a->ht_fact = 1.39; 1649 break; 1650 case MAT_SYMMETRIC: 1651 case MAT_STRUCTURALLY_SYMMETRIC: 1652 case MAT_HERMITIAN: 1653 case MAT_SUBMAT_SINGLEIS: 1654 case MAT_SYMMETRY_ETERNAL: 1655 MatCheckPreallocated(A,1); 1656 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1657 break; 1658 default: 1659 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1660 } 1661 PetscFunctionReturn(0); 1662 } 1663 1664 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1665 { 1666 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1667 Mat_SeqBAIJ *Aloc; 1668 Mat B; 1669 PetscErrorCode ierr; 1670 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1671 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1672 MatScalar *a; 1673 1674 PetscFunctionBegin; 1675 if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1676 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1677 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1678 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1679 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1680 /* Do not know preallocation information, but must set block size */ 1681 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr); 1682 } else { 1683 B = *matout; 1684 } 1685 1686 /* copy over the A part */ 1687 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1688 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1689 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1690 1691 for (i=0; i<mbs; i++) { 1692 rvals[0] = bs*(baij->rstartbs + i); 1693 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1694 for (j=ai[i]; j<ai[i+1]; j++) { 1695 col = (baij->cstartbs+aj[j])*bs; 1696 for (k=0; k<bs; k++) { 1697 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1698 1699 col++; a += bs; 1700 } 1701 } 1702 } 1703 /* copy over the B part */ 1704 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1705 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1706 for (i=0; i<mbs; i++) { 1707 rvals[0] = bs*(baij->rstartbs + i); 1708 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1709 for (j=ai[i]; j<ai[i+1]; j++) { 1710 col = baij->garray[aj[j]]*bs; 1711 for (k=0; k<bs; k++) { 1712 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1713 col++; 1714 a += bs; 1715 } 1716 } 1717 } 1718 ierr = PetscFree(rvals);CHKERRQ(ierr); 1719 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1720 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1721 1722 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1723 else { 1724 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 1725 } 1726 PetscFunctionReturn(0); 1727 } 1728 1729 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1730 { 1731 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1732 Mat a = baij->A,b = baij->B; 1733 PetscErrorCode ierr; 1734 PetscInt s1,s2,s3; 1735 1736 PetscFunctionBegin; 1737 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1738 if (rr) { 1739 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1740 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1741 /* Overlap communication with computation. */ 1742 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1743 } 1744 if (ll) { 1745 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1746 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1747 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1748 } 1749 /* scale the diagonal block */ 1750 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1751 1752 if (rr) { 1753 /* Do a scatter end and then right scale the off-diagonal block */ 1754 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1755 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1756 } 1757 PetscFunctionReturn(0); 1758 } 1759 1760 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1761 { 1762 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1763 PetscInt *lrows; 1764 PetscInt r, len; 1765 PetscErrorCode ierr; 1766 1767 PetscFunctionBegin; 1768 /* get locally owned rows */ 1769 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 1770 /* fix right hand side if needed */ 1771 if (x && b) { 1772 const PetscScalar *xx; 1773 PetscScalar *bb; 1774 1775 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1776 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1777 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 1778 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1779 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1780 } 1781 1782 /* actually zap the local rows */ 1783 /* 1784 Zero the required rows. If the "diagonal block" of the matrix 1785 is square and the user wishes to set the diagonal we use separate 1786 code so that MatSetValues() is not called for each diagonal allocating 1787 new memory, thus calling lots of mallocs and slowing things down. 1788 1789 */ 1790 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1791 ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1792 if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */ 1793 PetscBool cong; 1794 ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr); 1795 if (cong) A->congruentlayouts = 1; 1796 else A->congruentlayouts = 0; 1797 } 1798 if ((diag != 0.0) && A->congruentlayouts) { 1799 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr); 1800 } else if (diag != 0.0) { 1801 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr); 1802 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1803 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1804 for (r = 0; r < len; ++r) { 1805 const PetscInt row = lrows[r] + A->rmap->rstart; 1806 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1807 } 1808 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1809 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1810 } else { 1811 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1812 } 1813 ierr = PetscFree(lrows);CHKERRQ(ierr); 1814 1815 /* only change matrix nonzero state if pattern was allowed to be changed */ 1816 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1817 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1818 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1819 } 1820 PetscFunctionReturn(0); 1821 } 1822 1823 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1824 { 1825 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1826 PetscErrorCode ierr; 1827 PetscMPIInt n = A->rmap->n; 1828 PetscInt i,j,k,r,p = 0,len = 0,row,col,count; 1829 PetscInt *lrows,*owners = A->rmap->range; 1830 PetscSFNode *rrows; 1831 PetscSF sf; 1832 const PetscScalar *xx; 1833 PetscScalar *bb,*mask; 1834 Vec xmask,lmask; 1835 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)l->B->data; 1836 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1837 PetscScalar *aa; 1838 1839 PetscFunctionBegin; 1840 /* Create SF where leaves are input rows and roots are owned rows */ 1841 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1842 for (r = 0; r < n; ++r) lrows[r] = -1; 1843 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1844 for (r = 0; r < N; ++r) { 1845 const PetscInt idx = rows[r]; 1846 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1847 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1848 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1849 } 1850 rrows[r].rank = p; 1851 rrows[r].index = rows[r] - owners[p]; 1852 } 1853 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1854 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1855 /* Collect flags for rows to be zeroed */ 1856 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1857 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1858 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1859 /* Compress and put in row numbers */ 1860 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1861 /* zero diagonal part of matrix */ 1862 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1863 /* handle off diagonal part of matrix */ 1864 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1865 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1866 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1867 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1868 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1869 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1870 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1871 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1872 if (x) { 1873 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1874 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1875 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1876 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1877 } 1878 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1879 /* remove zeroed rows of off diagonal matrix */ 1880 for (i = 0; i < len; ++i) { 1881 row = lrows[i]; 1882 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1883 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 1884 for (k = 0; k < count; ++k) { 1885 aa[0] = 0.0; 1886 aa += bs; 1887 } 1888 } 1889 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1890 for (i = 0; i < l->B->rmap->N; ++i) { 1891 row = i/bs; 1892 for (j = baij->i[row]; j < baij->i[row+1]; ++j) { 1893 for (k = 0; k < bs; ++k) { 1894 col = bs*baij->j[j] + k; 1895 if (PetscAbsScalar(mask[col])) { 1896 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 1897 if (x) bb[i] -= aa[0]*xx[col]; 1898 aa[0] = 0.0; 1899 } 1900 } 1901 } 1902 } 1903 if (x) { 1904 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1905 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1906 } 1907 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1908 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1909 ierr = PetscFree(lrows);CHKERRQ(ierr); 1910 1911 /* only change matrix nonzero state if pattern was allowed to be changed */ 1912 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1913 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1914 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1915 } 1916 PetscFunctionReturn(0); 1917 } 1918 1919 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1920 { 1921 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1922 PetscErrorCode ierr; 1923 1924 PetscFunctionBegin; 1925 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1926 PetscFunctionReturn(0); 1927 } 1928 1929 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 1930 1931 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1932 { 1933 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1934 Mat a,b,c,d; 1935 PetscBool flg; 1936 PetscErrorCode ierr; 1937 1938 PetscFunctionBegin; 1939 a = matA->A; b = matA->B; 1940 c = matB->A; d = matB->B; 1941 1942 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1943 if (flg) { 1944 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1945 } 1946 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1947 PetscFunctionReturn(0); 1948 } 1949 1950 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1951 { 1952 PetscErrorCode ierr; 1953 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1954 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 1955 1956 PetscFunctionBegin; 1957 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1958 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1959 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1960 } else { 1961 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1962 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1963 } 1964 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 1965 PetscFunctionReturn(0); 1966 } 1967 1968 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1969 { 1970 PetscErrorCode ierr; 1971 1972 PetscFunctionBegin; 1973 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1974 PetscFunctionReturn(0); 1975 } 1976 1977 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 1978 { 1979 PetscErrorCode ierr; 1980 PetscInt bs = Y->rmap->bs,m = Y->rmap->N/bs; 1981 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 1982 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 1983 1984 PetscFunctionBegin; 1985 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 1986 PetscFunctionReturn(0); 1987 } 1988 1989 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1990 { 1991 PetscErrorCode ierr; 1992 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 1993 PetscBLASInt bnz,one=1; 1994 Mat_SeqBAIJ *x,*y; 1995 PetscInt bs2 = Y->rmap->bs*Y->rmap->bs; 1996 1997 PetscFunctionBegin; 1998 if (str == SAME_NONZERO_PATTERN) { 1999 PetscScalar alpha = a; 2000 x = (Mat_SeqBAIJ*)xx->A->data; 2001 y = (Mat_SeqBAIJ*)yy->A->data; 2002 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2003 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2004 x = (Mat_SeqBAIJ*)xx->B->data; 2005 y = (Mat_SeqBAIJ*)yy->B->data; 2006 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2007 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2008 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2009 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2010 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2011 } else { 2012 Mat B; 2013 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 2014 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2015 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2016 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2017 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2018 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2019 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2020 ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr); 2021 ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2022 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2023 ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2024 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 2025 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2026 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2027 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2028 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2029 } 2030 PetscFunctionReturn(0); 2031 } 2032 2033 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 2034 { 2035 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2036 PetscErrorCode ierr; 2037 2038 PetscFunctionBegin; 2039 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2040 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2041 PetscFunctionReturn(0); 2042 } 2043 2044 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2045 { 2046 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2047 PetscErrorCode ierr; 2048 2049 PetscFunctionBegin; 2050 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2051 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2052 PetscFunctionReturn(0); 2053 } 2054 2055 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2056 { 2057 PetscErrorCode ierr; 2058 IS iscol_local; 2059 PetscInt csize; 2060 2061 PetscFunctionBegin; 2062 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2063 if (call == MAT_REUSE_MATRIX) { 2064 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2065 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2066 } else { 2067 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2068 } 2069 ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2070 if (call == MAT_INITIAL_MATRIX) { 2071 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2072 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2073 } 2074 PetscFunctionReturn(0); 2075 } 2076 2077 /* 2078 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2079 in local and then by concatenating the local matrices the end result. 2080 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 2081 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 2082 */ 2083 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2084 { 2085 PetscErrorCode ierr; 2086 PetscMPIInt rank,size; 2087 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2088 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2089 Mat M,Mreuse; 2090 MatScalar *vwork,*aa; 2091 MPI_Comm comm; 2092 IS isrow_new, iscol_new; 2093 Mat_SeqBAIJ *aij; 2094 2095 PetscFunctionBegin; 2096 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2097 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2098 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2099 /* The compression and expansion should be avoided. Doesn't point 2100 out errors, might change the indices, hence buggey */ 2101 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 2102 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 2103 2104 if (call == MAT_REUSE_MATRIX) { 2105 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 2106 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2107 ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse);CHKERRQ(ierr); 2108 } else { 2109 ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse);CHKERRQ(ierr); 2110 } 2111 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 2112 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 2113 /* 2114 m - number of local rows 2115 n - number of columns (same on all processors) 2116 rstart - first row in new global matrix generated 2117 */ 2118 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2119 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2120 m = m/bs; 2121 n = n/bs; 2122 2123 if (call == MAT_INITIAL_MATRIX) { 2124 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2125 ii = aij->i; 2126 jj = aij->j; 2127 2128 /* 2129 Determine the number of non-zeros in the diagonal and off-diagonal 2130 portions of the matrix in order to do correct preallocation 2131 */ 2132 2133 /* first get start and end of "diagonal" columns */ 2134 if (csize == PETSC_DECIDE) { 2135 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2136 if (mglobal == n*bs) { /* square matrix */ 2137 nlocal = m; 2138 } else { 2139 nlocal = n/size + ((n % size) > rank); 2140 } 2141 } else { 2142 nlocal = csize/bs; 2143 } 2144 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2145 rstart = rend - nlocal; 2146 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2147 2148 /* next, compute all the lengths */ 2149 ierr = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr); 2150 for (i=0; i<m; i++) { 2151 jend = ii[i+1] - ii[i]; 2152 olen = 0; 2153 dlen = 0; 2154 for (j=0; j<jend; j++) { 2155 if (*jj < rstart || *jj >= rend) olen++; 2156 else dlen++; 2157 jj++; 2158 } 2159 olens[i] = olen; 2160 dlens[i] = dlen; 2161 } 2162 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2163 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2164 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2165 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2166 ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2167 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 2168 } else { 2169 PetscInt ml,nl; 2170 2171 M = *newmat; 2172 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2173 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2174 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2175 /* 2176 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2177 rather than the slower MatSetValues(). 2178 */ 2179 M->was_assembled = PETSC_TRUE; 2180 M->assembled = PETSC_FALSE; 2181 } 2182 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2183 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2184 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2185 ii = aij->i; 2186 jj = aij->j; 2187 aa = aij->a; 2188 for (i=0; i<m; i++) { 2189 row = rstart/bs + i; 2190 nz = ii[i+1] - ii[i]; 2191 cwork = jj; jj += nz; 2192 vwork = aa; aa += nz*bs*bs; 2193 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2194 } 2195 2196 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2197 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2198 *newmat = M; 2199 2200 /* save submatrix used in processor for next request */ 2201 if (call == MAT_INITIAL_MATRIX) { 2202 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2203 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2204 } 2205 PetscFunctionReturn(0); 2206 } 2207 2208 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2209 { 2210 MPI_Comm comm,pcomm; 2211 PetscInt clocal_size,nrows; 2212 const PetscInt *rows; 2213 PetscMPIInt size; 2214 IS crowp,lcolp; 2215 PetscErrorCode ierr; 2216 2217 PetscFunctionBegin; 2218 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2219 /* make a collective version of 'rowp' */ 2220 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2221 if (pcomm==comm) { 2222 crowp = rowp; 2223 } else { 2224 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2225 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2226 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2227 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2228 } 2229 ierr = ISSetPermutation(crowp);CHKERRQ(ierr); 2230 /* make a local version of 'colp' */ 2231 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2232 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2233 if (size==1) { 2234 lcolp = colp; 2235 } else { 2236 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 2237 } 2238 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2239 /* now we just get the submatrix */ 2240 ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr); 2241 ierr = MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2242 /* clean up */ 2243 if (pcomm!=comm) { 2244 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2245 } 2246 if (size>1) { 2247 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2248 } 2249 PetscFunctionReturn(0); 2250 } 2251 2252 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2253 { 2254 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2255 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2256 2257 PetscFunctionBegin; 2258 if (nghosts) *nghosts = B->nbs; 2259 if (ghosts) *ghosts = baij->garray; 2260 PetscFunctionReturn(0); 2261 } 2262 2263 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2264 { 2265 Mat B; 2266 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2267 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2268 Mat_SeqAIJ *b; 2269 PetscErrorCode ierr; 2270 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2271 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2272 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2273 2274 PetscFunctionBegin; 2275 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2276 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2277 2278 /* ---------------------------------------------------------------- 2279 Tell every processor the number of nonzeros per row 2280 */ 2281 ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr); 2282 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2283 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2284 } 2285 ierr = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr); 2286 displs = recvcounts + size; 2287 for (i=0; i<size; i++) { 2288 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2289 displs[i] = A->rmap->range[i]/bs; 2290 } 2291 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2292 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2293 #else 2294 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2295 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2296 #endif 2297 /* --------------------------------------------------------------- 2298 Create the sequential matrix of the same type as the local block diagonal 2299 */ 2300 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2301 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2302 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2303 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2304 b = (Mat_SeqAIJ*)B->data; 2305 2306 /*-------------------------------------------------------------------- 2307 Copy my part of matrix column indices over 2308 */ 2309 sendcount = ad->nz + bd->nz; 2310 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2311 a_jsendbuf = ad->j; 2312 b_jsendbuf = bd->j; 2313 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2314 cnt = 0; 2315 for (i=0; i<n; i++) { 2316 2317 /* put in lower diagonal portion */ 2318 m = bd->i[i+1] - bd->i[i]; 2319 while (m > 0) { 2320 /* is it above diagonal (in bd (compressed) numbering) */ 2321 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2322 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2323 m--; 2324 } 2325 2326 /* put in diagonal portion */ 2327 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2328 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2329 } 2330 2331 /* put in upper diagonal portion */ 2332 while (m-- > 0) { 2333 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2334 } 2335 } 2336 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2337 2338 /*-------------------------------------------------------------------- 2339 Gather all column indices to all processors 2340 */ 2341 for (i=0; i<size; i++) { 2342 recvcounts[i] = 0; 2343 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2344 recvcounts[i] += lens[j]; 2345 } 2346 } 2347 displs[0] = 0; 2348 for (i=1; i<size; i++) { 2349 displs[i] = displs[i-1] + recvcounts[i-1]; 2350 } 2351 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2352 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2353 #else 2354 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2355 #endif 2356 /*-------------------------------------------------------------------- 2357 Assemble the matrix into useable form (note numerical values not yet set) 2358 */ 2359 /* set the b->ilen (length of each row) values */ 2360 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2361 /* set the b->i indices */ 2362 b->i[0] = 0; 2363 for (i=1; i<=A->rmap->N/bs; i++) { 2364 b->i[i] = b->i[i-1] + lens[i-1]; 2365 } 2366 ierr = PetscFree(lens);CHKERRQ(ierr); 2367 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2368 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2369 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2370 2371 if (A->symmetric) { 2372 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2373 } else if (A->hermitian) { 2374 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2375 } else if (A->structurally_symmetric) { 2376 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2377 } 2378 *newmat = B; 2379 PetscFunctionReturn(0); 2380 } 2381 2382 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2383 { 2384 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2385 PetscErrorCode ierr; 2386 Vec bb1 = 0; 2387 2388 PetscFunctionBegin; 2389 if (flag == SOR_APPLY_UPPER) { 2390 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2391 PetscFunctionReturn(0); 2392 } 2393 2394 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2395 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2396 } 2397 2398 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2399 if (flag & SOR_ZERO_INITIAL_GUESS) { 2400 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2401 its--; 2402 } 2403 2404 while (its--) { 2405 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2406 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2407 2408 /* update rhs: bb1 = bb - B*x */ 2409 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2410 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2411 2412 /* local sweep */ 2413 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2414 } 2415 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2416 if (flag & SOR_ZERO_INITIAL_GUESS) { 2417 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2418 its--; 2419 } 2420 while (its--) { 2421 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2422 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2423 2424 /* update rhs: bb1 = bb - B*x */ 2425 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2426 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2427 2428 /* local sweep */ 2429 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2430 } 2431 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2432 if (flag & SOR_ZERO_INITIAL_GUESS) { 2433 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2434 its--; 2435 } 2436 while (its--) { 2437 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2438 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2439 2440 /* update rhs: bb1 = bb - B*x */ 2441 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2442 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2443 2444 /* local sweep */ 2445 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2446 } 2447 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2448 2449 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2450 PetscFunctionReturn(0); 2451 } 2452 2453 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms) 2454 { 2455 PetscErrorCode ierr; 2456 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data; 2457 PetscInt N,i,*garray = aij->garray; 2458 PetscInt ib,jb,bs = A->rmap->bs; 2459 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data; 2460 MatScalar *a_val = a_aij->a; 2461 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data; 2462 MatScalar *b_val = b_aij->a; 2463 PetscReal *work; 2464 2465 PetscFunctionBegin; 2466 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 2467 ierr = PetscCalloc1(N,&work);CHKERRQ(ierr); 2468 if (type == NORM_2) { 2469 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2470 for (jb=0; jb<bs; jb++) { 2471 for (ib=0; ib<bs; ib++) { 2472 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2473 a_val++; 2474 } 2475 } 2476 } 2477 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2478 for (jb=0; jb<bs; jb++) { 2479 for (ib=0; ib<bs; ib++) { 2480 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2481 b_val++; 2482 } 2483 } 2484 } 2485 } else if (type == NORM_1) { 2486 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2487 for (jb=0; jb<bs; jb++) { 2488 for (ib=0; ib<bs; ib++) { 2489 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2490 a_val++; 2491 } 2492 } 2493 } 2494 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2495 for (jb=0; jb<bs; jb++) { 2496 for (ib=0; ib<bs; ib++) { 2497 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2498 b_val++; 2499 } 2500 } 2501 } 2502 } else if (type == NORM_INFINITY) { 2503 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2504 for (jb=0; jb<bs; jb++) { 2505 for (ib=0; ib<bs; ib++) { 2506 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2507 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2508 a_val++; 2509 } 2510 } 2511 } 2512 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2513 for (jb=0; jb<bs; jb++) { 2514 for (ib=0; ib<bs; ib++) { 2515 int col = garray[b_aij->j[i]] * bs + jb; 2516 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2517 b_val++; 2518 } 2519 } 2520 } 2521 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 2522 if (type == NORM_INFINITY) { 2523 ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2524 } else { 2525 ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2526 } 2527 ierr = PetscFree(work);CHKERRQ(ierr); 2528 if (type == NORM_2) { 2529 for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]); 2530 } 2531 PetscFunctionReturn(0); 2532 } 2533 2534 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2535 { 2536 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2537 PetscErrorCode ierr; 2538 2539 PetscFunctionBegin; 2540 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2541 A->factorerrortype = a->A->factorerrortype; 2542 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2543 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2544 PetscFunctionReturn(0); 2545 } 2546 2547 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a) 2548 { 2549 PetscErrorCode ierr; 2550 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ*)Y->data; 2551 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)maij->A->data; 2552 2553 PetscFunctionBegin; 2554 if (!Y->preallocated) { 2555 ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 2556 } else if (!aij->nz) { 2557 PetscInt nonew = aij->nonew; 2558 ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2559 aij->nonew = nonew; 2560 } 2561 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2562 PetscFunctionReturn(0); 2563 } 2564 2565 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool *missing,PetscInt *d) 2566 { 2567 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2568 PetscErrorCode ierr; 2569 2570 PetscFunctionBegin; 2571 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2572 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2573 if (d) { 2574 PetscInt rstart; 2575 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2576 *d += rstart/A->rmap->bs; 2577 2578 } 2579 PetscFunctionReturn(0); 2580 } 2581 2582 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2583 { 2584 PetscFunctionBegin; 2585 *a = ((Mat_MPIBAIJ*)A->data)->A; 2586 PetscFunctionReturn(0); 2587 } 2588 2589 /* -------------------------------------------------------------------*/ 2590 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2591 MatGetRow_MPIBAIJ, 2592 MatRestoreRow_MPIBAIJ, 2593 MatMult_MPIBAIJ, 2594 /* 4*/ MatMultAdd_MPIBAIJ, 2595 MatMultTranspose_MPIBAIJ, 2596 MatMultTransposeAdd_MPIBAIJ, 2597 0, 2598 0, 2599 0, 2600 /*10*/ 0, 2601 0, 2602 0, 2603 MatSOR_MPIBAIJ, 2604 MatTranspose_MPIBAIJ, 2605 /*15*/ MatGetInfo_MPIBAIJ, 2606 MatEqual_MPIBAIJ, 2607 MatGetDiagonal_MPIBAIJ, 2608 MatDiagonalScale_MPIBAIJ, 2609 MatNorm_MPIBAIJ, 2610 /*20*/ MatAssemblyBegin_MPIBAIJ, 2611 MatAssemblyEnd_MPIBAIJ, 2612 MatSetOption_MPIBAIJ, 2613 MatZeroEntries_MPIBAIJ, 2614 /*24*/ MatZeroRows_MPIBAIJ, 2615 0, 2616 0, 2617 0, 2618 0, 2619 /*29*/ MatSetUp_MPIBAIJ, 2620 0, 2621 0, 2622 MatGetDiagonalBlock_MPIBAIJ, 2623 0, 2624 /*34*/ MatDuplicate_MPIBAIJ, 2625 0, 2626 0, 2627 0, 2628 0, 2629 /*39*/ MatAXPY_MPIBAIJ, 2630 MatCreateSubMatrices_MPIBAIJ, 2631 MatIncreaseOverlap_MPIBAIJ, 2632 MatGetValues_MPIBAIJ, 2633 MatCopy_MPIBAIJ, 2634 /*44*/ 0, 2635 MatScale_MPIBAIJ, 2636 MatShift_MPIBAIJ, 2637 0, 2638 MatZeroRowsColumns_MPIBAIJ, 2639 /*49*/ 0, 2640 0, 2641 0, 2642 0, 2643 0, 2644 /*54*/ MatFDColoringCreate_MPIXAIJ, 2645 0, 2646 MatSetUnfactored_MPIBAIJ, 2647 MatPermute_MPIBAIJ, 2648 MatSetValuesBlocked_MPIBAIJ, 2649 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2650 MatDestroy_MPIBAIJ, 2651 MatView_MPIBAIJ, 2652 0, 2653 0, 2654 /*64*/ 0, 2655 0, 2656 0, 2657 0, 2658 0, 2659 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2660 0, 2661 0, 2662 0, 2663 0, 2664 /*74*/ 0, 2665 MatFDColoringApply_BAIJ, 2666 0, 2667 0, 2668 0, 2669 /*79*/ 0, 2670 0, 2671 0, 2672 0, 2673 MatLoad_MPIBAIJ, 2674 /*84*/ 0, 2675 0, 2676 0, 2677 0, 2678 0, 2679 /*89*/ 0, 2680 0, 2681 0, 2682 0, 2683 0, 2684 /*94*/ 0, 2685 0, 2686 0, 2687 0, 2688 0, 2689 /*99*/ 0, 2690 0, 2691 0, 2692 0, 2693 0, 2694 /*104*/0, 2695 MatRealPart_MPIBAIJ, 2696 MatImaginaryPart_MPIBAIJ, 2697 0, 2698 0, 2699 /*109*/0, 2700 0, 2701 0, 2702 0, 2703 MatMissingDiagonal_MPIBAIJ, 2704 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2705 0, 2706 MatGetGhosts_MPIBAIJ, 2707 0, 2708 0, 2709 /*119*/0, 2710 0, 2711 0, 2712 0, 2713 MatGetMultiProcBlock_MPIBAIJ, 2714 /*124*/0, 2715 MatGetColumnNorms_MPIBAIJ, 2716 MatInvertBlockDiagonal_MPIBAIJ, 2717 0, 2718 0, 2719 /*129*/ 0, 2720 0, 2721 0, 2722 0, 2723 0, 2724 /*134*/ 0, 2725 0, 2726 0, 2727 0, 2728 0, 2729 /*139*/ MatSetBlockSizes_Default, 2730 0, 2731 0, 2732 MatFDColoringSetUp_MPIXAIJ, 2733 0, 2734 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ 2735 }; 2736 2737 2738 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2739 2740 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2741 { 2742 PetscInt m,rstart,cstart,cend; 2743 PetscInt i,j,dlen,olen,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2744 const PetscInt *JJ =0; 2745 PetscScalar *values=0; 2746 PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented; 2747 PetscErrorCode ierr; 2748 2749 PetscFunctionBegin; 2750 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2751 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2752 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2753 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2754 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2755 m = B->rmap->n/bs; 2756 rstart = B->rmap->rstart/bs; 2757 cstart = B->cmap->rstart/bs; 2758 cend = B->cmap->rend/bs; 2759 2760 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2761 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2762 for (i=0; i<m; i++) { 2763 nz = ii[i+1] - ii[i]; 2764 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2765 nz_max = PetscMax(nz_max,nz); 2766 dlen = 0; 2767 olen = 0; 2768 JJ = jj + ii[i]; 2769 for (j=0; j<nz; j++) { 2770 if (*JJ < cstart || *JJ >= cend) olen++; 2771 else dlen++; 2772 JJ++; 2773 } 2774 d_nnz[i] = dlen; 2775 o_nnz[i] = olen; 2776 } 2777 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2778 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2779 2780 values = (PetscScalar*)V; 2781 if (!values) { 2782 ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2783 } 2784 for (i=0; i<m; i++) { 2785 PetscInt row = i + rstart; 2786 PetscInt ncols = ii[i+1] - ii[i]; 2787 const PetscInt *icols = jj + ii[i]; 2788 if (!roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2789 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2790 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2791 } else { /* block ordering does not match so we can only insert one block at a time. */ 2792 PetscInt j; 2793 for (j=0; j<ncols; j++) { 2794 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2795 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2796 } 2797 } 2798 } 2799 2800 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2801 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2802 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2803 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2804 PetscFunctionReturn(0); 2805 } 2806 2807 /*@C 2808 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2809 (the default parallel PETSc format). 2810 2811 Collective on MPI_Comm 2812 2813 Input Parameters: 2814 + B - the matrix 2815 . bs - the block size 2816 . i - the indices into j for the start of each local row (starts with zero) 2817 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2818 - v - optional values in the matrix 2819 2820 Level: developer 2821 2822 Notes: 2823 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 2824 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 2825 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2826 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2827 block column and the second index is over columns within a block. 2828 2829 .keywords: matrix, aij, compressed row, sparse, parallel 2830 2831 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ 2832 @*/ 2833 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2834 { 2835 PetscErrorCode ierr; 2836 2837 PetscFunctionBegin; 2838 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2839 PetscValidType(B,1); 2840 PetscValidLogicalCollectiveInt(B,bs,2); 2841 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2842 PetscFunctionReturn(0); 2843 } 2844 2845 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2846 { 2847 Mat_MPIBAIJ *b; 2848 PetscErrorCode ierr; 2849 PetscInt i; 2850 2851 PetscFunctionBegin; 2852 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2853 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2854 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2855 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2856 2857 if (d_nnz) { 2858 for (i=0; i<B->rmap->n/bs; i++) { 2859 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2860 } 2861 } 2862 if (o_nnz) { 2863 for (i=0; i<B->rmap->n/bs; i++) { 2864 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2865 } 2866 } 2867 2868 b = (Mat_MPIBAIJ*)B->data; 2869 b->bs2 = bs*bs; 2870 b->mbs = B->rmap->n/bs; 2871 b->nbs = B->cmap->n/bs; 2872 b->Mbs = B->rmap->N/bs; 2873 b->Nbs = B->cmap->N/bs; 2874 2875 for (i=0; i<=b->size; i++) { 2876 b->rangebs[i] = B->rmap->range[i]/bs; 2877 } 2878 b->rstartbs = B->rmap->rstart/bs; 2879 b->rendbs = B->rmap->rend/bs; 2880 b->cstartbs = B->cmap->rstart/bs; 2881 b->cendbs = B->cmap->rend/bs; 2882 2883 #if defined(PETSC_USE_CTABLE) 2884 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2885 #else 2886 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2887 #endif 2888 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2889 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2890 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2891 2892 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2893 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2894 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2895 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2896 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2897 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2898 2899 if (!B->preallocated) { 2900 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2901 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2902 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2903 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2904 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2905 } 2906 2907 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2908 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2909 B->preallocated = PETSC_TRUE; 2910 B->was_assembled = PETSC_FALSE; 2911 B->assembled = PETSC_FALSE; 2912 PetscFunctionReturn(0); 2913 } 2914 2915 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2916 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2917 2918 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 2919 { 2920 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2921 PetscErrorCode ierr; 2922 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2923 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2924 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2925 2926 PetscFunctionBegin; 2927 ierr = PetscMalloc1(M+1,&ii);CHKERRQ(ierr); 2928 ii[0] = 0; 2929 for (i=0; i<M; i++) { 2930 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2931 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 2932 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2933 /* remove one from count of matrix has diagonal */ 2934 for (j=id[i]; j<id[i+1]; j++) { 2935 if (jd[j] == i) {ii[i+1]--;break;} 2936 } 2937 } 2938 ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr); 2939 cnt = 0; 2940 for (i=0; i<M; i++) { 2941 for (j=io[i]; j<io[i+1]; j++) { 2942 if (garray[jo[j]] > rstart) break; 2943 jj[cnt++] = garray[jo[j]]; 2944 } 2945 for (k=id[i]; k<id[i+1]; k++) { 2946 if (jd[k] != i) { 2947 jj[cnt++] = rstart + jd[k]; 2948 } 2949 } 2950 for (; j<io[i+1]; j++) { 2951 jj[cnt++] = garray[jo[j]]; 2952 } 2953 } 2954 ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr); 2955 PetscFunctionReturn(0); 2956 } 2957 2958 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2959 2960 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 2961 2962 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 2963 { 2964 PetscErrorCode ierr; 2965 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2966 Mat B; 2967 Mat_MPIAIJ *b; 2968 2969 PetscFunctionBegin; 2970 if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 2971 2972 if (reuse == MAT_REUSE_MATRIX) { 2973 B = *newmat; 2974 } else { 2975 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2976 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2977 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2978 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2979 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 2980 ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 2981 } 2982 b = (Mat_MPIAIJ*) B->data; 2983 2984 if (reuse == MAT_REUSE_MATRIX) { 2985 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A);CHKERRQ(ierr); 2986 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B);CHKERRQ(ierr); 2987 } else { 2988 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 2989 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2990 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 2991 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 2992 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 2993 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2994 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2995 } 2996 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2997 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2998 2999 if (reuse == MAT_INPLACE_MATRIX) { 3000 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 3001 } else { 3002 *newmat = B; 3003 } 3004 PetscFunctionReturn(0); 3005 } 3006 3007 /*MC 3008 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 3009 3010 Options Database Keys: 3011 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 3012 . -mat_block_size <bs> - set the blocksize used to store the matrix 3013 - -mat_use_hash_table <fact> 3014 3015 Level: beginner 3016 3017 .seealso: MatCreateMPIBAIJ 3018 M*/ 3019 3020 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 3021 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 3022 3023 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3024 { 3025 Mat_MPIBAIJ *b; 3026 PetscErrorCode ierr; 3027 PetscBool flg = PETSC_FALSE; 3028 3029 PetscFunctionBegin; 3030 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3031 B->data = (void*)b; 3032 3033 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3034 B->assembled = PETSC_FALSE; 3035 3036 B->insertmode = NOT_SET_VALUES; 3037 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 3038 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 3039 3040 /* build local table of row and column ownerships */ 3041 ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr); 3042 3043 /* build cache for off array entries formed */ 3044 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 3045 3046 b->donotstash = PETSC_FALSE; 3047 b->colmap = NULL; 3048 b->garray = NULL; 3049 b->roworiented = PETSC_TRUE; 3050 3051 /* stuff used in block assembly */ 3052 b->barray = 0; 3053 3054 /* stuff used for matrix vector multiply */ 3055 b->lvec = 0; 3056 b->Mvctx = 0; 3057 3058 /* stuff for MatGetRow() */ 3059 b->rowindices = 0; 3060 b->rowvalues = 0; 3061 b->getrowactive = PETSC_FALSE; 3062 3063 /* hash table stuff */ 3064 b->ht = 0; 3065 b->hd = 0; 3066 b->ht_size = 0; 3067 b->ht_flag = PETSC_FALSE; 3068 b->ht_fact = 0; 3069 b->ht_total_ct = 0; 3070 b->ht_insert_ct = 0; 3071 3072 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 3073 b->ijonly = PETSC_FALSE; 3074 3075 3076 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3077 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3078 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3079 #if defined(PETSC_HAVE_HYPRE) 3080 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 3081 #endif 3082 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3083 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3084 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3085 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3086 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3087 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3088 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpibaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 3089 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3090 3091 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3092 ierr = PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg);CHKERRQ(ierr); 3093 if (flg) { 3094 PetscReal fact = 1.39; 3095 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3096 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 3097 if (fact <= 1.0) fact = 1.39; 3098 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3099 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3100 } 3101 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3102 PetscFunctionReturn(0); 3103 } 3104 3105 /*MC 3106 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3107 3108 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3109 and MATMPIBAIJ otherwise. 3110 3111 Options Database Keys: 3112 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3113 3114 Level: beginner 3115 3116 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3117 M*/ 3118 3119 /*@C 3120 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3121 (block compressed row). For good matrix assembly performance 3122 the user should preallocate the matrix storage by setting the parameters 3123 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3124 performance can be increased by more than a factor of 50. 3125 3126 Collective on Mat 3127 3128 Input Parameters: 3129 + B - the matrix 3130 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3131 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3132 . d_nz - number of block nonzeros per block row in diagonal portion of local 3133 submatrix (same for all local rows) 3134 . d_nnz - array containing the number of block nonzeros in the various block rows 3135 of the in diagonal portion of the local (possibly different for each block 3136 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 3137 set it even if it is zero. 3138 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3139 submatrix (same for all local rows). 3140 - o_nnz - array containing the number of nonzeros in the various block rows of the 3141 off-diagonal portion of the local submatrix (possibly different for 3142 each block row) or NULL. 3143 3144 If the *_nnz parameter is given then the *_nz parameter is ignored 3145 3146 Options Database Keys: 3147 + -mat_block_size - size of the blocks to use 3148 - -mat_use_hash_table <fact> 3149 3150 Notes: 3151 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3152 than it must be used on all processors that share the object for that argument. 3153 3154 Storage Information: 3155 For a square global matrix we define each processor's diagonal portion 3156 to be its local rows and the corresponding columns (a square submatrix); 3157 each processor's off-diagonal portion encompasses the remainder of the 3158 local matrix (a rectangular submatrix). 3159 3160 The user can specify preallocated storage for the diagonal part of 3161 the local submatrix with either d_nz or d_nnz (not both). Set 3162 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3163 memory allocation. Likewise, specify preallocated storage for the 3164 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3165 3166 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3167 the figure below we depict these three local rows and all columns (0-11). 3168 3169 .vb 3170 0 1 2 3 4 5 6 7 8 9 10 11 3171 -------------------------- 3172 row 3 |o o o d d d o o o o o o 3173 row 4 |o o o d d d o o o o o o 3174 row 5 |o o o d d d o o o o o o 3175 -------------------------- 3176 .ve 3177 3178 Thus, any entries in the d locations are stored in the d (diagonal) 3179 submatrix, and any entries in the o locations are stored in the 3180 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3181 stored simply in the MATSEQBAIJ format for compressed row storage. 3182 3183 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3184 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3185 In general, for PDE problems in which most nonzeros are near the diagonal, 3186 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3187 or you will get TERRIBLE performance; see the users' manual chapter on 3188 matrices. 3189 3190 You can call MatGetInfo() to get information on how effective the preallocation was; 3191 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3192 You can also run with the option -info and look for messages with the string 3193 malloc in them to see if additional memory allocation was needed. 3194 3195 Level: intermediate 3196 3197 .keywords: matrix, block, aij, compressed row, sparse, parallel 3198 3199 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3200 @*/ 3201 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3202 { 3203 PetscErrorCode ierr; 3204 3205 PetscFunctionBegin; 3206 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3207 PetscValidType(B,1); 3208 PetscValidLogicalCollectiveInt(B,bs,2); 3209 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3210 PetscFunctionReturn(0); 3211 } 3212 3213 /*@C 3214 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3215 (block compressed row). For good matrix assembly performance 3216 the user should preallocate the matrix storage by setting the parameters 3217 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3218 performance can be increased by more than a factor of 50. 3219 3220 Collective on MPI_Comm 3221 3222 Input Parameters: 3223 + comm - MPI communicator 3224 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3225 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3226 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3227 This value should be the same as the local size used in creating the 3228 y vector for the matrix-vector product y = Ax. 3229 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3230 This value should be the same as the local size used in creating the 3231 x vector for the matrix-vector product y = Ax. 3232 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3233 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3234 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3235 submatrix (same for all local rows) 3236 . d_nnz - array containing the number of nonzero blocks in the various block rows 3237 of the in diagonal portion of the local (possibly different for each block 3238 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3239 and set it even if it is zero. 3240 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3241 submatrix (same for all local rows). 3242 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3243 off-diagonal portion of the local submatrix (possibly different for 3244 each block row) or NULL. 3245 3246 Output Parameter: 3247 . A - the matrix 3248 3249 Options Database Keys: 3250 + -mat_block_size - size of the blocks to use 3251 - -mat_use_hash_table <fact> 3252 3253 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3254 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3255 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3256 3257 Notes: 3258 If the *_nnz parameter is given then the *_nz parameter is ignored 3259 3260 A nonzero block is any block that as 1 or more nonzeros in it 3261 3262 The user MUST specify either the local or global matrix dimensions 3263 (possibly both). 3264 3265 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3266 than it must be used on all processors that share the object for that argument. 3267 3268 Storage Information: 3269 For a square global matrix we define each processor's diagonal portion 3270 to be its local rows and the corresponding columns (a square submatrix); 3271 each processor's off-diagonal portion encompasses the remainder of the 3272 local matrix (a rectangular submatrix). 3273 3274 The user can specify preallocated storage for the diagonal part of 3275 the local submatrix with either d_nz or d_nnz (not both). Set 3276 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3277 memory allocation. Likewise, specify preallocated storage for the 3278 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3279 3280 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3281 the figure below we depict these three local rows and all columns (0-11). 3282 3283 .vb 3284 0 1 2 3 4 5 6 7 8 9 10 11 3285 -------------------------- 3286 row 3 |o o o d d d o o o o o o 3287 row 4 |o o o d d d o o o o o o 3288 row 5 |o o o d d d o o o o o o 3289 -------------------------- 3290 .ve 3291 3292 Thus, any entries in the d locations are stored in the d (diagonal) 3293 submatrix, and any entries in the o locations are stored in the 3294 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3295 stored simply in the MATSEQBAIJ format for compressed row storage. 3296 3297 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3298 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3299 In general, for PDE problems in which most nonzeros are near the diagonal, 3300 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3301 or you will get TERRIBLE performance; see the users' manual chapter on 3302 matrices. 3303 3304 Level: intermediate 3305 3306 .keywords: matrix, block, aij, compressed row, sparse, parallel 3307 3308 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3309 @*/ 3310 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3311 { 3312 PetscErrorCode ierr; 3313 PetscMPIInt size; 3314 3315 PetscFunctionBegin; 3316 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3317 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3318 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3319 if (size > 1) { 3320 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3321 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3322 } else { 3323 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3324 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3325 } 3326 PetscFunctionReturn(0); 3327 } 3328 3329 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3330 { 3331 Mat mat; 3332 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3333 PetscErrorCode ierr; 3334 PetscInt len=0; 3335 3336 PetscFunctionBegin; 3337 *newmat = 0; 3338 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3339 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3340 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3341 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3342 3343 mat->factortype = matin->factortype; 3344 mat->preallocated = PETSC_TRUE; 3345 mat->assembled = PETSC_TRUE; 3346 mat->insertmode = NOT_SET_VALUES; 3347 3348 a = (Mat_MPIBAIJ*)mat->data; 3349 mat->rmap->bs = matin->rmap->bs; 3350 a->bs2 = oldmat->bs2; 3351 a->mbs = oldmat->mbs; 3352 a->nbs = oldmat->nbs; 3353 a->Mbs = oldmat->Mbs; 3354 a->Nbs = oldmat->Nbs; 3355 3356 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3357 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3358 3359 a->size = oldmat->size; 3360 a->rank = oldmat->rank; 3361 a->donotstash = oldmat->donotstash; 3362 a->roworiented = oldmat->roworiented; 3363 a->rowindices = 0; 3364 a->rowvalues = 0; 3365 a->getrowactive = PETSC_FALSE; 3366 a->barray = 0; 3367 a->rstartbs = oldmat->rstartbs; 3368 a->rendbs = oldmat->rendbs; 3369 a->cstartbs = oldmat->cstartbs; 3370 a->cendbs = oldmat->cendbs; 3371 3372 /* hash table stuff */ 3373 a->ht = 0; 3374 a->hd = 0; 3375 a->ht_size = 0; 3376 a->ht_flag = oldmat->ht_flag; 3377 a->ht_fact = oldmat->ht_fact; 3378 a->ht_total_ct = 0; 3379 a->ht_insert_ct = 0; 3380 3381 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3382 if (oldmat->colmap) { 3383 #if defined(PETSC_USE_CTABLE) 3384 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3385 #else 3386 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 3387 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3388 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3389 #endif 3390 } else a->colmap = 0; 3391 3392 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3393 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 3394 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3395 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3396 } else a->garray = 0; 3397 3398 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3399 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3400 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3401 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3402 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3403 3404 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3405 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3406 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3407 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3408 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3409 *newmat = mat; 3410 PetscFunctionReturn(0); 3411 } 3412 3413 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3414 { 3415 PetscErrorCode ierr; 3416 int fd; 3417 PetscInt i,nz,j,rstart,rend; 3418 PetscScalar *vals,*buf; 3419 MPI_Comm comm; 3420 MPI_Status status; 3421 PetscMPIInt rank,size,maxnz; 3422 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3423 PetscInt *locrowlens = NULL,*procsnz = NULL,*browners = NULL; 3424 PetscInt jj,*mycols,*ibuf,bs = newmat->rmap->bs,Mbs,mbs,extra_rows,mmax; 3425 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3426 PetscInt *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount; 3427 PetscInt dcount,kmax,k,nzcount,tmp,mend; 3428 3429 PetscFunctionBegin; 3430 /* force binary viewer to load .info file if it has not yet done so */ 3431 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3432 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3433 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3434 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3435 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3436 if (bs < 0) bs = 1; 3437 3438 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3439 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3440 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3441 if (!rank) { 3442 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3443 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3444 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3445 } 3446 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3447 M = header[1]; N = header[2]; 3448 3449 /* If global sizes are set, check if they are consistent with that given in the file */ 3450 if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M); 3451 if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N); 3452 3453 if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices"); 3454 3455 /* 3456 This code adds extra rows to make sure the number of rows is 3457 divisible by the blocksize 3458 */ 3459 Mbs = M/bs; 3460 extra_rows = bs - M + bs*Mbs; 3461 if (extra_rows == bs) extra_rows = 0; 3462 else Mbs++; 3463 if (extra_rows && !rank) { 3464 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3465 } 3466 3467 /* determine ownership of all rows */ 3468 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3469 mbs = Mbs/size + ((Mbs % size) > rank); 3470 m = mbs*bs; 3471 } else { /* User set */ 3472 m = newmat->rmap->n; 3473 mbs = m/bs; 3474 } 3475 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 3476 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3477 3478 /* process 0 needs enough room for process with most rows */ 3479 if (!rank) { 3480 mmax = rowners[1]; 3481 for (i=2; i<=size; i++) { 3482 mmax = PetscMax(mmax,rowners[i]); 3483 } 3484 mmax*=bs; 3485 } else mmax = -1; /* unused, but compiler warns anyway */ 3486 3487 rowners[0] = 0; 3488 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3489 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3490 rstart = rowners[rank]; 3491 rend = rowners[rank+1]; 3492 3493 /* distribute row lengths to all processors */ 3494 ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr); 3495 if (!rank) { 3496 mend = m; 3497 if (size == 1) mend = mend - extra_rows; 3498 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3499 for (j=mend; j<m; j++) locrowlens[j] = 1; 3500 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3501 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3502 for (j=0; j<m; j++) { 3503 procsnz[0] += locrowlens[j]; 3504 } 3505 for (i=1; i<size; i++) { 3506 mend = browners[i+1] - browners[i]; 3507 if (i == size-1) mend = mend - extra_rows; 3508 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3509 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3510 /* calculate the number of nonzeros on each processor */ 3511 for (j=0; j<browners[i+1]-browners[i]; j++) { 3512 procsnz[i] += rowlengths[j]; 3513 } 3514 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3515 } 3516 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3517 } else { 3518 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3519 } 3520 3521 if (!rank) { 3522 /* determine max buffer needed and allocate it */ 3523 maxnz = procsnz[0]; 3524 for (i=1; i<size; i++) { 3525 maxnz = PetscMax(maxnz,procsnz[i]); 3526 } 3527 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3528 3529 /* read in my part of the matrix column indices */ 3530 nz = procsnz[0]; 3531 ierr = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr); 3532 mycols = ibuf; 3533 if (size == 1) nz -= extra_rows; 3534 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3535 if (size == 1) { 3536 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 3537 } 3538 3539 /* read in every ones (except the last) and ship off */ 3540 for (i=1; i<size-1; i++) { 3541 nz = procsnz[i]; 3542 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3543 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3544 } 3545 /* read in the stuff for the last proc */ 3546 if (size != 1) { 3547 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3548 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3549 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3550 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3551 } 3552 ierr = PetscFree(cols);CHKERRQ(ierr); 3553 } else { 3554 /* determine buffer space needed for message */ 3555 nz = 0; 3556 for (i=0; i<m; i++) { 3557 nz += locrowlens[i]; 3558 } 3559 ierr = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr); 3560 mycols = ibuf; 3561 /* receive message of column indices*/ 3562 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3563 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3564 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3565 } 3566 3567 /* loop over local rows, determining number of off diagonal entries */ 3568 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 3569 ierr = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 3570 rowcount = 0; nzcount = 0; 3571 for (i=0; i<mbs; i++) { 3572 dcount = 0; 3573 odcount = 0; 3574 for (j=0; j<bs; j++) { 3575 kmax = locrowlens[rowcount]; 3576 for (k=0; k<kmax; k++) { 3577 tmp = mycols[nzcount++]/bs; 3578 if (!mask[tmp]) { 3579 mask[tmp] = 1; 3580 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3581 else masked1[dcount++] = tmp; 3582 } 3583 } 3584 rowcount++; 3585 } 3586 3587 dlens[i] = dcount; 3588 odlens[i] = odcount; 3589 3590 /* zero out the mask elements we set */ 3591 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3592 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3593 } 3594 3595 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3596 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3597 3598 if (!rank) { 3599 ierr = PetscMalloc1(maxnz+1,&buf);CHKERRQ(ierr); 3600 /* read in my part of the matrix numerical values */ 3601 nz = procsnz[0]; 3602 vals = buf; 3603 mycols = ibuf; 3604 if (size == 1) nz -= extra_rows; 3605 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3606 if (size == 1) { 3607 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 3608 } 3609 3610 /* insert into matrix */ 3611 jj = rstart*bs; 3612 for (i=0; i<m; i++) { 3613 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3614 mycols += locrowlens[i]; 3615 vals += locrowlens[i]; 3616 jj++; 3617 } 3618 /* read in other processors (except the last one) and ship out */ 3619 for (i=1; i<size-1; i++) { 3620 nz = procsnz[i]; 3621 vals = buf; 3622 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3623 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3624 } 3625 /* the last proc */ 3626 if (size != 1) { 3627 nz = procsnz[i] - extra_rows; 3628 vals = buf; 3629 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3630 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3631 ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3632 } 3633 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3634 } else { 3635 /* receive numeric values */ 3636 ierr = PetscMalloc1(nz+1,&buf);CHKERRQ(ierr); 3637 3638 /* receive message of values*/ 3639 vals = buf; 3640 mycols = ibuf; 3641 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3642 3643 /* insert into matrix */ 3644 jj = rstart*bs; 3645 for (i=0; i<m; i++) { 3646 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3647 mycols += locrowlens[i]; 3648 vals += locrowlens[i]; 3649 jj++; 3650 } 3651 } 3652 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3653 ierr = PetscFree(buf);CHKERRQ(ierr); 3654 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3655 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3656 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3657 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3658 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3659 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3660 PetscFunctionReturn(0); 3661 } 3662 3663 /*@ 3664 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3665 3666 Input Parameters: 3667 . mat - the matrix 3668 . fact - factor 3669 3670 Not Collective, each process can use a different factor 3671 3672 Level: advanced 3673 3674 Notes: 3675 This can also be set by the command line option: -mat_use_hash_table <fact> 3676 3677 .keywords: matrix, hashtable, factor, HT 3678 3679 .seealso: MatSetOption() 3680 @*/ 3681 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3682 { 3683 PetscErrorCode ierr; 3684 3685 PetscFunctionBegin; 3686 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3687 PetscFunctionReturn(0); 3688 } 3689 3690 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3691 { 3692 Mat_MPIBAIJ *baij; 3693 3694 PetscFunctionBegin; 3695 baij = (Mat_MPIBAIJ*)mat->data; 3696 baij->ht_fact = fact; 3697 PetscFunctionReturn(0); 3698 } 3699 3700 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3701 { 3702 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3703 PetscBool flg; 3704 PetscErrorCode ierr; 3705 3706 PetscFunctionBegin; 3707 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg);CHKERRQ(ierr); 3708 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input"); 3709 if (Ad) *Ad = a->A; 3710 if (Ao) *Ao = a->B; 3711 if (colmap) *colmap = a->garray; 3712 PetscFunctionReturn(0); 3713 } 3714 3715 /* 3716 Special version for direct calls from Fortran (to eliminate two function call overheads 3717 */ 3718 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3719 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3720 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3721 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3722 #endif 3723 3724 /*@C 3725 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3726 3727 Collective on Mat 3728 3729 Input Parameters: 3730 + mat - the matrix 3731 . min - number of input rows 3732 . im - input rows 3733 . nin - number of input columns 3734 . in - input columns 3735 . v - numerical values input 3736 - addvin - INSERT_VALUES or ADD_VALUES 3737 3738 Notes: 3739 This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3740 3741 Level: advanced 3742 3743 .seealso: MatSetValuesBlocked() 3744 @*/ 3745 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3746 { 3747 /* convert input arguments to C version */ 3748 Mat mat = *matin; 3749 PetscInt m = *min, n = *nin; 3750 InsertMode addv = *addvin; 3751 3752 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3753 const MatScalar *value; 3754 MatScalar *barray = baij->barray; 3755 PetscBool roworiented = baij->roworiented; 3756 PetscErrorCode ierr; 3757 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3758 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3759 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3760 3761 PetscFunctionBegin; 3762 /* tasks normally handled by MatSetValuesBlocked() */ 3763 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3764 #if defined(PETSC_USE_DEBUG) 3765 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3766 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3767 #endif 3768 if (mat->assembled) { 3769 mat->was_assembled = PETSC_TRUE; 3770 mat->assembled = PETSC_FALSE; 3771 } 3772 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3773 3774 3775 if (!barray) { 3776 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 3777 baij->barray = barray; 3778 } 3779 3780 if (roworiented) stepval = (n-1)*bs; 3781 else stepval = (m-1)*bs; 3782 3783 for (i=0; i<m; i++) { 3784 if (im[i] < 0) continue; 3785 #if defined(PETSC_USE_DEBUG) 3786 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3787 #endif 3788 if (im[i] >= rstart && im[i] < rend) { 3789 row = im[i] - rstart; 3790 for (j=0; j<n; j++) { 3791 /* If NumCol = 1 then a copy is not required */ 3792 if ((roworiented) && (n == 1)) { 3793 barray = (MatScalar*)v + i*bs2; 3794 } else if ((!roworiented) && (m == 1)) { 3795 barray = (MatScalar*)v + j*bs2; 3796 } else { /* Here a copy is required */ 3797 if (roworiented) { 3798 value = v + i*(stepval+bs)*bs + j*bs; 3799 } else { 3800 value = v + j*(stepval+bs)*bs + i*bs; 3801 } 3802 for (ii=0; ii<bs; ii++,value+=stepval) { 3803 for (jj=0; jj<bs; jj++) { 3804 *barray++ = *value++; 3805 } 3806 } 3807 barray -=bs2; 3808 } 3809 3810 if (in[j] >= cstart && in[j] < cend) { 3811 col = in[j] - cstart; 3812 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 3813 } else if (in[j] < 0) continue; 3814 #if defined(PETSC_USE_DEBUG) 3815 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 3816 #endif 3817 else { 3818 if (mat->was_assembled) { 3819 if (!baij->colmap) { 3820 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3821 } 3822 3823 #if defined(PETSC_USE_DEBUG) 3824 #if defined(PETSC_USE_CTABLE) 3825 { PetscInt data; 3826 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3827 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3828 } 3829 #else 3830 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3831 #endif 3832 #endif 3833 #if defined(PETSC_USE_CTABLE) 3834 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3835 col = (col - 1)/bs; 3836 #else 3837 col = (baij->colmap[in[j]] - 1)/bs; 3838 #endif 3839 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3840 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3841 col = in[j]; 3842 } 3843 } else col = in[j]; 3844 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 3845 } 3846 } 3847 } else { 3848 if (!baij->donotstash) { 3849 if (roworiented) { 3850 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3851 } else { 3852 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3853 } 3854 } 3855 } 3856 } 3857 3858 /* task normally handled by MatSetValuesBlocked() */ 3859 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3860 PetscFunctionReturn(0); 3861 } 3862 3863 /*@ 3864 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 3865 CSR format the local rows. 3866 3867 Collective on MPI_Comm 3868 3869 Input Parameters: 3870 + comm - MPI communicator 3871 . bs - the block size, only a block size of 1 is supported 3872 . m - number of local rows (Cannot be PETSC_DECIDE) 3873 . n - This value should be the same as the local size used in creating the 3874 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3875 calculated if N is given) For square matrices n is almost always m. 3876 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3877 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3878 . i - row indices 3879 . j - column indices 3880 - a - matrix values 3881 3882 Output Parameter: 3883 . mat - the matrix 3884 3885 Level: intermediate 3886 3887 Notes: 3888 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3889 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3890 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3891 3892 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3893 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3894 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3895 with column-major ordering within blocks. 3896 3897 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3898 3899 .keywords: matrix, aij, compressed row, sparse, parallel 3900 3901 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3902 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3903 @*/ 3904 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3905 { 3906 PetscErrorCode ierr; 3907 3908 PetscFunctionBegin; 3909 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3910 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3911 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3912 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3913 ierr = MatSetType(*mat,MATMPIBAIJ);CHKERRQ(ierr); 3914 ierr = MatSetBlockSize(*mat,bs);CHKERRQ(ierr); 3915 ierr = MatSetUp(*mat);CHKERRQ(ierr); 3916 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3917 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3918 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3919 PetscFunctionReturn(0); 3920 } 3921 3922 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3923 { 3924 PetscErrorCode ierr; 3925 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3926 PetscInt *indx; 3927 PetscScalar *values; 3928 3929 PetscFunctionBegin; 3930 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3931 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3932 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inmat->data; 3933 PetscInt *dnz,*onz,mbs,Nbs,nbs; 3934 PetscInt *bindx,rmax=a->rmax,j; 3935 PetscMPIInt rank,size; 3936 3937 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3938 mbs = m/bs; Nbs = N/cbs; 3939 if (n == PETSC_DECIDE) { 3940 nbs = n; 3941 ierr = PetscSplitOwnership(comm,&nbs,&Nbs);CHKERRQ(ierr); 3942 n = nbs*cbs; 3943 } else { 3944 nbs = n/cbs; 3945 } 3946 3947 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 3948 ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */ 3949 3950 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3951 ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr); 3952 if (rank == size-1) { 3953 /* Check sum(nbs) = Nbs */ 3954 if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs); 3955 } 3956 3957 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */ 3958 for (i=0; i<mbs; i++) { 3959 ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3960 nnz = nnz/bs; 3961 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3962 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3963 ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3964 } 3965 ierr = PetscFree(bindx);CHKERRQ(ierr); 3966 3967 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3968 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3969 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3970 ierr = MatSetType(*outmat,MATBAIJ);CHKERRQ(ierr); 3971 ierr = MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr); 3972 ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3973 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3974 } 3975 3976 /* numeric phase */ 3977 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3978 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3979 3980 for (i=0; i<m; i++) { 3981 ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3982 Ii = i + rstart; 3983 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3984 ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3985 } 3986 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3987 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3988 PetscFunctionReturn(0); 3989 } 3990