1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 #if defined(PETSC_HAVE_HYPRE) 8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 9 #endif 10 11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 12 { 13 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 14 PetscErrorCode ierr; 15 PetscInt i,*idxb = 0; 16 PetscScalar *va,*vb; 17 Vec vtmp; 18 19 PetscFunctionBegin; 20 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 21 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 22 if (idx) { 23 for (i=0; i<A->rmap->n; i++) { 24 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 25 } 26 } 27 28 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 29 if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);} 30 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 31 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 32 33 for (i=0; i<A->rmap->n; i++) { 34 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 35 va[i] = vb[i]; 36 if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs); 37 } 38 } 39 40 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 41 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 42 ierr = PetscFree(idxb);CHKERRQ(ierr); 43 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 44 PetscFunctionReturn(0); 45 } 46 47 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 48 { 49 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 54 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 55 PetscFunctionReturn(0); 56 } 57 58 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 59 { 60 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 61 PetscErrorCode ierr; 62 63 PetscFunctionBegin; 64 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 65 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 66 PetscFunctionReturn(0); 67 } 68 69 /* 70 Local utility routine that creates a mapping from the global column 71 number to the local number in the off-diagonal part of the local 72 storage of the matrix. This is done in a non scalable way since the 73 length of colmap equals the global matrix length. 74 */ 75 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 76 { 77 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 78 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 79 PetscErrorCode ierr; 80 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 81 82 PetscFunctionBegin; 83 #if defined(PETSC_USE_CTABLE) 84 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 85 for (i=0; i<nbs; i++) { 86 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 87 } 88 #else 89 ierr = PetscMalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 90 ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 91 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 92 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 93 #endif 94 PetscFunctionReturn(0); 95 } 96 97 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 98 { \ 99 \ 100 brow = row/bs; \ 101 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 102 rmax = aimax[brow]; nrow = ailen[brow]; \ 103 bcol = col/bs; \ 104 ridx = row % bs; cidx = col % bs; \ 105 low = 0; high = nrow; \ 106 while (high-low > 3) { \ 107 t = (low+high)/2; \ 108 if (rp[t] > bcol) high = t; \ 109 else low = t; \ 110 } \ 111 for (_i=low; _i<high; _i++) { \ 112 if (rp[_i] > bcol) break; \ 113 if (rp[_i] == bcol) { \ 114 bap = ap + bs2*_i + bs*cidx + ridx; \ 115 if (addv == ADD_VALUES) *bap += value; \ 116 else *bap = value; \ 117 goto a_noinsert; \ 118 } \ 119 } \ 120 if (a->nonew == 1) goto a_noinsert; \ 121 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 122 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 123 N = nrow++ - 1; \ 124 /* shift up all the later entries in this row */ \ 125 for (ii=N; ii>=_i; ii--) { \ 126 rp[ii+1] = rp[ii]; \ 127 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 128 } \ 129 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 130 rp[_i] = bcol; \ 131 ap[bs2*_i + bs*cidx + ridx] = value; \ 132 a_noinsert:; \ 133 ailen[brow] = nrow; \ 134 } 135 136 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 137 { \ 138 brow = row/bs; \ 139 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 140 rmax = bimax[brow]; nrow = bilen[brow]; \ 141 bcol = col/bs; \ 142 ridx = row % bs; cidx = col % bs; \ 143 low = 0; high = nrow; \ 144 while (high-low > 3) { \ 145 t = (low+high)/2; \ 146 if (rp[t] > bcol) high = t; \ 147 else low = t; \ 148 } \ 149 for (_i=low; _i<high; _i++) { \ 150 if (rp[_i] > bcol) break; \ 151 if (rp[_i] == bcol) { \ 152 bap = ap + bs2*_i + bs*cidx + ridx; \ 153 if (addv == ADD_VALUES) *bap += value; \ 154 else *bap = value; \ 155 goto b_noinsert; \ 156 } \ 157 } \ 158 if (b->nonew == 1) goto b_noinsert; \ 159 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 160 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 161 N = nrow++ - 1; \ 162 /* shift up all the later entries in this row */ \ 163 for (ii=N; ii>=_i; ii--) { \ 164 rp[ii+1] = rp[ii]; \ 165 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 166 } \ 167 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 168 rp[_i] = bcol; \ 169 ap[bs2*_i + bs*cidx + ridx] = value; \ 170 b_noinsert:; \ 171 bilen[brow] = nrow; \ 172 } 173 174 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 175 { 176 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 177 MatScalar value; 178 PetscBool roworiented = baij->roworiented; 179 PetscErrorCode ierr; 180 PetscInt i,j,row,col; 181 PetscInt rstart_orig=mat->rmap->rstart; 182 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 183 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 184 185 /* Some Variables required in the macro */ 186 Mat A = baij->A; 187 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 188 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 189 MatScalar *aa =a->a; 190 191 Mat B = baij->B; 192 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 193 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 194 MatScalar *ba =b->a; 195 196 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 197 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 198 MatScalar *ap,*bap; 199 200 PetscFunctionBegin; 201 for (i=0; i<m; i++) { 202 if (im[i] < 0) continue; 203 #if defined(PETSC_USE_DEBUG) 204 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 205 #endif 206 if (im[i] >= rstart_orig && im[i] < rend_orig) { 207 row = im[i] - rstart_orig; 208 for (j=0; j<n; j++) { 209 if (in[j] >= cstart_orig && in[j] < cend_orig) { 210 col = in[j] - cstart_orig; 211 if (roworiented) value = v[i*n+j]; 212 else value = v[i+j*m]; 213 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 214 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 215 } else if (in[j] < 0) continue; 216 #if defined(PETSC_USE_DEBUG) 217 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 218 #endif 219 else { 220 if (mat->was_assembled) { 221 if (!baij->colmap) { 222 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 223 } 224 #if defined(PETSC_USE_CTABLE) 225 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 226 col = col - 1; 227 #else 228 col = baij->colmap[in[j]/bs] - 1; 229 #endif 230 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 231 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 232 col = in[j]; 233 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 234 B = baij->B; 235 b = (Mat_SeqBAIJ*)(B)->data; 236 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 237 ba =b->a; 238 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 239 else col += in[j]%bs; 240 } else col = in[j]; 241 if (roworiented) value = v[i*n+j]; 242 else value = v[i+j*m]; 243 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 244 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 245 } 246 } 247 } else { 248 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 249 if (!baij->donotstash) { 250 mat->assembled = PETSC_FALSE; 251 if (roworiented) { 252 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 253 } else { 254 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 255 } 256 } 257 } 258 } 259 PetscFunctionReturn(0); 260 } 261 262 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 263 { 264 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 265 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 266 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 267 PetscErrorCode ierr; 268 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 269 PetscBool roworiented=a->roworiented; 270 const PetscScalar *value = v; 271 MatScalar *ap,*aa = a->a,*bap; 272 273 PetscFunctionBegin; 274 rp = aj + ai[row]; 275 ap = aa + bs2*ai[row]; 276 rmax = imax[row]; 277 nrow = ailen[row]; 278 value = v; 279 low = 0; 280 high = nrow; 281 while (high-low > 7) { 282 t = (low+high)/2; 283 if (rp[t] > col) high = t; 284 else low = t; 285 } 286 for (i=low; i<high; i++) { 287 if (rp[i] > col) break; 288 if (rp[i] == col) { 289 bap = ap + bs2*i; 290 if (roworiented) { 291 if (is == ADD_VALUES) { 292 for (ii=0; ii<bs; ii++) { 293 for (jj=ii; jj<bs2; jj+=bs) { 294 bap[jj] += *value++; 295 } 296 } 297 } else { 298 for (ii=0; ii<bs; ii++) { 299 for (jj=ii; jj<bs2; jj+=bs) { 300 bap[jj] = *value++; 301 } 302 } 303 } 304 } else { 305 if (is == ADD_VALUES) { 306 for (ii=0; ii<bs; ii++,value+=bs) { 307 for (jj=0; jj<bs; jj++) { 308 bap[jj] += value[jj]; 309 } 310 bap += bs; 311 } 312 } else { 313 for (ii=0; ii<bs; ii++,value+=bs) { 314 for (jj=0; jj<bs; jj++) { 315 bap[jj] = value[jj]; 316 } 317 bap += bs; 318 } 319 } 320 } 321 goto noinsert2; 322 } 323 } 324 if (nonew == 1) goto noinsert2; 325 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 326 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 327 N = nrow++ - 1; high++; 328 /* shift up all the later entries in this row */ 329 for (ii=N; ii>=i; ii--) { 330 rp[ii+1] = rp[ii]; 331 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 332 } 333 if (N >= i) { 334 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 335 } 336 rp[i] = col; 337 bap = ap + bs2*i; 338 if (roworiented) { 339 for (ii=0; ii<bs; ii++) { 340 for (jj=ii; jj<bs2; jj+=bs) { 341 bap[jj] = *value++; 342 } 343 } 344 } else { 345 for (ii=0; ii<bs; ii++) { 346 for (jj=0; jj<bs; jj++) { 347 *bap++ = *value++; 348 } 349 } 350 } 351 noinsert2:; 352 ailen[row] = nrow; 353 PetscFunctionReturn(0); 354 } 355 356 /* 357 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 358 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 359 */ 360 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 361 { 362 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 363 const PetscScalar *value; 364 MatScalar *barray = baij->barray; 365 PetscBool roworiented = baij->roworiented; 366 PetscErrorCode ierr; 367 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 368 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 369 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 370 371 PetscFunctionBegin; 372 if (!barray) { 373 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 374 baij->barray = barray; 375 } 376 377 if (roworiented) stepval = (n-1)*bs; 378 else stepval = (m-1)*bs; 379 380 for (i=0; i<m; i++) { 381 if (im[i] < 0) continue; 382 #if defined(PETSC_USE_DEBUG) 383 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 384 #endif 385 if (im[i] >= rstart && im[i] < rend) { 386 row = im[i] - rstart; 387 for (j=0; j<n; j++) { 388 /* If NumCol = 1 then a copy is not required */ 389 if ((roworiented) && (n == 1)) { 390 barray = (MatScalar*)v + i*bs2; 391 } else if ((!roworiented) && (m == 1)) { 392 barray = (MatScalar*)v + j*bs2; 393 } else { /* Here a copy is required */ 394 if (roworiented) { 395 value = v + (i*(stepval+bs) + j)*bs; 396 } else { 397 value = v + (j*(stepval+bs) + i)*bs; 398 } 399 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 400 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 401 barray += bs; 402 } 403 barray -= bs2; 404 } 405 406 if (in[j] >= cstart && in[j] < cend) { 407 col = in[j] - cstart; 408 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 409 } else if (in[j] < 0) continue; 410 #if defined(PETSC_USE_DEBUG) 411 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 412 #endif 413 else { 414 if (mat->was_assembled) { 415 if (!baij->colmap) { 416 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 417 } 418 419 #if defined(PETSC_USE_DEBUG) 420 #if defined(PETSC_USE_CTABLE) 421 { PetscInt data; 422 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 423 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 424 } 425 #else 426 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 427 #endif 428 #endif 429 #if defined(PETSC_USE_CTABLE) 430 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 431 col = (col - 1)/bs; 432 #else 433 col = (baij->colmap[in[j]] - 1)/bs; 434 #endif 435 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 436 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 437 col = in[j]; 438 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]); 439 } else col = in[j]; 440 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 441 } 442 } 443 } else { 444 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 445 if (!baij->donotstash) { 446 if (roworiented) { 447 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 448 } else { 449 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 450 } 451 } 452 } 453 } 454 PetscFunctionReturn(0); 455 } 456 457 #define HASH_KEY 0.6180339887 458 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 459 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 460 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 461 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 462 { 463 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 464 PetscBool roworiented = baij->roworiented; 465 PetscErrorCode ierr; 466 PetscInt i,j,row,col; 467 PetscInt rstart_orig=mat->rmap->rstart; 468 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 469 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 470 PetscReal tmp; 471 MatScalar **HD = baij->hd,value; 472 #if defined(PETSC_USE_DEBUG) 473 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 474 #endif 475 476 PetscFunctionBegin; 477 for (i=0; i<m; i++) { 478 #if defined(PETSC_USE_DEBUG) 479 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 480 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 481 #endif 482 row = im[i]; 483 if (row >= rstart_orig && row < rend_orig) { 484 for (j=0; j<n; j++) { 485 col = in[j]; 486 if (roworiented) value = v[i*n+j]; 487 else value = v[i+j*m]; 488 /* Look up PetscInto the Hash Table */ 489 key = (row/bs)*Nbs+(col/bs)+1; 490 h1 = HASH(size,key,tmp); 491 492 493 idx = h1; 494 #if defined(PETSC_USE_DEBUG) 495 insert_ct++; 496 total_ct++; 497 if (HT[idx] != key) { 498 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 499 if (idx == size) { 500 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 501 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 502 } 503 } 504 #else 505 if (HT[idx] != key) { 506 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 507 if (idx == size) { 508 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 509 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 510 } 511 } 512 #endif 513 /* A HASH table entry is found, so insert the values at the correct address */ 514 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 515 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 516 } 517 } else if (!baij->donotstash) { 518 if (roworiented) { 519 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 520 } else { 521 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 522 } 523 } 524 } 525 #if defined(PETSC_USE_DEBUG) 526 baij->ht_total_ct += total_ct; 527 baij->ht_insert_ct += insert_ct; 528 #endif 529 PetscFunctionReturn(0); 530 } 531 532 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 533 { 534 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 535 PetscBool roworiented = baij->roworiented; 536 PetscErrorCode ierr; 537 PetscInt i,j,ii,jj,row,col; 538 PetscInt rstart=baij->rstartbs; 539 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 540 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 541 PetscReal tmp; 542 MatScalar **HD = baij->hd,*baij_a; 543 const PetscScalar *v_t,*value; 544 #if defined(PETSC_USE_DEBUG) 545 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 546 #endif 547 548 PetscFunctionBegin; 549 if (roworiented) stepval = (n-1)*bs; 550 else stepval = (m-1)*bs; 551 552 for (i=0; i<m; i++) { 553 #if defined(PETSC_USE_DEBUG) 554 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 555 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 556 #endif 557 row = im[i]; 558 v_t = v + i*nbs2; 559 if (row >= rstart && row < rend) { 560 for (j=0; j<n; j++) { 561 col = in[j]; 562 563 /* Look up into the Hash Table */ 564 key = row*Nbs+col+1; 565 h1 = HASH(size,key,tmp); 566 567 idx = h1; 568 #if defined(PETSC_USE_DEBUG) 569 total_ct++; 570 insert_ct++; 571 if (HT[idx] != key) { 572 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 573 if (idx == size) { 574 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 575 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 576 } 577 } 578 #else 579 if (HT[idx] != key) { 580 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 581 if (idx == size) { 582 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 583 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 584 } 585 } 586 #endif 587 baij_a = HD[idx]; 588 if (roworiented) { 589 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 590 /* value = v + (i*(stepval+bs)+j)*bs; */ 591 value = v_t; 592 v_t += bs; 593 if (addv == ADD_VALUES) { 594 for (ii=0; ii<bs; ii++,value+=stepval) { 595 for (jj=ii; jj<bs2; jj+=bs) { 596 baij_a[jj] += *value++; 597 } 598 } 599 } else { 600 for (ii=0; ii<bs; ii++,value+=stepval) { 601 for (jj=ii; jj<bs2; jj+=bs) { 602 baij_a[jj] = *value++; 603 } 604 } 605 } 606 } else { 607 value = v + j*(stepval+bs)*bs + i*bs; 608 if (addv == ADD_VALUES) { 609 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 610 for (jj=0; jj<bs; jj++) { 611 baij_a[jj] += *value++; 612 } 613 } 614 } else { 615 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 616 for (jj=0; jj<bs; jj++) { 617 baij_a[jj] = *value++; 618 } 619 } 620 } 621 } 622 } 623 } else { 624 if (!baij->donotstash) { 625 if (roworiented) { 626 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 627 } else { 628 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 629 } 630 } 631 } 632 } 633 #if defined(PETSC_USE_DEBUG) 634 baij->ht_total_ct += total_ct; 635 baij->ht_insert_ct += insert_ct; 636 #endif 637 PetscFunctionReturn(0); 638 } 639 640 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 641 { 642 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 643 PetscErrorCode ierr; 644 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 645 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 646 647 PetscFunctionBegin; 648 for (i=0; i<m; i++) { 649 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 650 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 651 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 652 row = idxm[i] - bsrstart; 653 for (j=0; j<n; j++) { 654 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 655 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 656 if (idxn[j] >= bscstart && idxn[j] < bscend) { 657 col = idxn[j] - bscstart; 658 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 659 } else { 660 if (!baij->colmap) { 661 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 662 } 663 #if defined(PETSC_USE_CTABLE) 664 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 665 data--; 666 #else 667 data = baij->colmap[idxn[j]/bs]-1; 668 #endif 669 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 670 else { 671 col = data + idxn[j]%bs; 672 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 673 } 674 } 675 } 676 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 677 } 678 PetscFunctionReturn(0); 679 } 680 681 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 682 { 683 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 684 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 685 PetscErrorCode ierr; 686 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 687 PetscReal sum = 0.0; 688 MatScalar *v; 689 690 PetscFunctionBegin; 691 if (baij->size == 1) { 692 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 693 } else { 694 if (type == NORM_FROBENIUS) { 695 v = amat->a; 696 nz = amat->nz*bs2; 697 for (i=0; i<nz; i++) { 698 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 699 } 700 v = bmat->a; 701 nz = bmat->nz*bs2; 702 for (i=0; i<nz; i++) { 703 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 704 } 705 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 706 *nrm = PetscSqrtReal(*nrm); 707 } else if (type == NORM_1) { /* max column sum */ 708 PetscReal *tmp,*tmp2; 709 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 710 ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr); 711 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 712 v = amat->a; jj = amat->j; 713 for (i=0; i<amat->nz; i++) { 714 for (j=0; j<bs; j++) { 715 col = bs*(cstart + *jj) + j; /* column index */ 716 for (row=0; row<bs; row++) { 717 tmp[col] += PetscAbsScalar(*v); v++; 718 } 719 } 720 jj++; 721 } 722 v = bmat->a; jj = bmat->j; 723 for (i=0; i<bmat->nz; i++) { 724 for (j=0; j<bs; j++) { 725 col = bs*garray[*jj] + j; 726 for (row=0; row<bs; row++) { 727 tmp[col] += PetscAbsScalar(*v); v++; 728 } 729 } 730 jj++; 731 } 732 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 733 *nrm = 0.0; 734 for (j=0; j<mat->cmap->N; j++) { 735 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 736 } 737 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 738 } else if (type == NORM_INFINITY) { /* max row sum */ 739 PetscReal *sums; 740 ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr); 741 sum = 0.0; 742 for (j=0; j<amat->mbs; j++) { 743 for (row=0; row<bs; row++) sums[row] = 0.0; 744 v = amat->a + bs2*amat->i[j]; 745 nz = amat->i[j+1]-amat->i[j]; 746 for (i=0; i<nz; i++) { 747 for (col=0; col<bs; col++) { 748 for (row=0; row<bs; row++) { 749 sums[row] += PetscAbsScalar(*v); v++; 750 } 751 } 752 } 753 v = bmat->a + bs2*bmat->i[j]; 754 nz = bmat->i[j+1]-bmat->i[j]; 755 for (i=0; i<nz; i++) { 756 for (col=0; col<bs; col++) { 757 for (row=0; row<bs; row++) { 758 sums[row] += PetscAbsScalar(*v); v++; 759 } 760 } 761 } 762 for (row=0; row<bs; row++) { 763 if (sums[row] > sum) sum = sums[row]; 764 } 765 } 766 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 767 ierr = PetscFree(sums);CHKERRQ(ierr); 768 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 769 } 770 PetscFunctionReturn(0); 771 } 772 773 /* 774 Creates the hash table, and sets the table 775 This table is created only once. 776 If new entried need to be added to the matrix 777 then the hash table has to be destroyed and 778 recreated. 779 */ 780 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 781 { 782 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 783 Mat A = baij->A,B=baij->B; 784 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 785 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 786 PetscErrorCode ierr; 787 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 788 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 789 PetscInt *HT,key; 790 MatScalar **HD; 791 PetscReal tmp; 792 #if defined(PETSC_USE_INFO) 793 PetscInt ct=0,max=0; 794 #endif 795 796 PetscFunctionBegin; 797 if (baij->ht) PetscFunctionReturn(0); 798 799 baij->ht_size = (PetscInt)(factor*nz); 800 ht_size = baij->ht_size; 801 802 /* Allocate Memory for Hash Table */ 803 ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr); 804 HD = baij->hd; 805 HT = baij->ht; 806 807 /* Loop Over A */ 808 for (i=0; i<a->mbs; i++) { 809 for (j=ai[i]; j<ai[i+1]; j++) { 810 row = i+rstart; 811 col = aj[j]+cstart; 812 813 key = row*Nbs + col + 1; 814 h1 = HASH(ht_size,key,tmp); 815 for (k=0; k<ht_size; k++) { 816 if (!HT[(h1+k)%ht_size]) { 817 HT[(h1+k)%ht_size] = key; 818 HD[(h1+k)%ht_size] = a->a + j*bs2; 819 break; 820 #if defined(PETSC_USE_INFO) 821 } else { 822 ct++; 823 #endif 824 } 825 } 826 #if defined(PETSC_USE_INFO) 827 if (k> max) max = k; 828 #endif 829 } 830 } 831 /* Loop Over B */ 832 for (i=0; i<b->mbs; i++) { 833 for (j=bi[i]; j<bi[i+1]; j++) { 834 row = i+rstart; 835 col = garray[bj[j]]; 836 key = row*Nbs + col + 1; 837 h1 = HASH(ht_size,key,tmp); 838 for (k=0; k<ht_size; k++) { 839 if (!HT[(h1+k)%ht_size]) { 840 HT[(h1+k)%ht_size] = key; 841 HD[(h1+k)%ht_size] = b->a + j*bs2; 842 break; 843 #if defined(PETSC_USE_INFO) 844 } else { 845 ct++; 846 #endif 847 } 848 } 849 #if defined(PETSC_USE_INFO) 850 if (k> max) max = k; 851 #endif 852 } 853 } 854 855 /* Print Summary */ 856 #if defined(PETSC_USE_INFO) 857 for (i=0,j=0; i<ht_size; i++) { 858 if (HT[i]) j++; 859 } 860 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 861 #endif 862 PetscFunctionReturn(0); 863 } 864 865 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 866 { 867 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 868 PetscErrorCode ierr; 869 PetscInt nstash,reallocs; 870 871 PetscFunctionBegin; 872 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 873 874 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 875 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 876 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 877 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 878 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 879 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 880 PetscFunctionReturn(0); 881 } 882 883 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 884 { 885 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 886 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 887 PetscErrorCode ierr; 888 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 889 PetscInt *row,*col; 890 PetscBool r1,r2,r3,other_disassembled; 891 MatScalar *val; 892 PetscMPIInt n; 893 894 PetscFunctionBegin; 895 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 896 if (!baij->donotstash && !mat->nooffprocentries) { 897 while (1) { 898 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 899 if (!flg) break; 900 901 for (i=0; i<n;) { 902 /* Now identify the consecutive vals belonging to the same row */ 903 for (j=i,rstart=row[j]; j<n; j++) { 904 if (row[j] != rstart) break; 905 } 906 if (j < n) ncols = j-i; 907 else ncols = n-i; 908 /* Now assemble all these values with a single function call */ 909 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 910 i = j; 911 } 912 } 913 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 914 /* Now process the block-stash. Since the values are stashed column-oriented, 915 set the roworiented flag to column oriented, and after MatSetValues() 916 restore the original flags */ 917 r1 = baij->roworiented; 918 r2 = a->roworiented; 919 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 920 921 baij->roworiented = PETSC_FALSE; 922 a->roworiented = PETSC_FALSE; 923 924 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 925 while (1) { 926 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 927 if (!flg) break; 928 929 for (i=0; i<n;) { 930 /* Now identify the consecutive vals belonging to the same row */ 931 for (j=i,rstart=row[j]; j<n; j++) { 932 if (row[j] != rstart) break; 933 } 934 if (j < n) ncols = j-i; 935 else ncols = n-i; 936 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 937 i = j; 938 } 939 } 940 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 941 942 baij->roworiented = r1; 943 a->roworiented = r2; 944 945 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 946 } 947 948 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 949 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 950 951 /* determine if any processor has disassembled, if so we must 952 also disassemble ourselfs, in order that we may reassemble. */ 953 /* 954 if nonzero structure of submatrix B cannot change then we know that 955 no processor disassembled thus we can skip this stuff 956 */ 957 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 958 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 959 if (mat->was_assembled && !other_disassembled) { 960 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 961 } 962 } 963 964 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 965 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 966 } 967 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 968 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 969 970 #if defined(PETSC_USE_INFO) 971 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 972 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 973 974 baij->ht_total_ct = 0; 975 baij->ht_insert_ct = 0; 976 } 977 #endif 978 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 979 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 980 981 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 982 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 983 } 984 985 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 986 987 baij->rowvalues = 0; 988 989 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 990 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 991 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 992 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 993 } 994 PetscFunctionReturn(0); 995 } 996 997 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer); 998 #include <petscdraw.h> 999 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1000 { 1001 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1002 PetscErrorCode ierr; 1003 PetscMPIInt rank = baij->rank; 1004 PetscInt bs = mat->rmap->bs; 1005 PetscBool iascii,isdraw; 1006 PetscViewer sviewer; 1007 PetscViewerFormat format; 1008 1009 PetscFunctionBegin; 1010 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1011 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1012 if (iascii) { 1013 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1014 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1015 MatInfo info; 1016 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1017 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1018 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1019 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n", 1020 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr); 1021 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1022 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1023 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1024 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1025 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1026 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1027 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1028 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1029 PetscFunctionReturn(0); 1030 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1031 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1032 PetscFunctionReturn(0); 1033 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1034 PetscFunctionReturn(0); 1035 } 1036 } 1037 1038 if (isdraw) { 1039 PetscDraw draw; 1040 PetscBool isnull; 1041 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1042 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1043 if (isnull) PetscFunctionReturn(0); 1044 } 1045 1046 { 1047 /* assemble the entire matrix onto first processor. */ 1048 Mat A; 1049 Mat_SeqBAIJ *Aloc; 1050 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1051 MatScalar *a; 1052 const char *matname; 1053 1054 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1055 /* Perhaps this should be the type of mat? */ 1056 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1057 if (!rank) { 1058 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1059 } else { 1060 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1061 } 1062 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1063 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 1064 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1065 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1066 1067 /* copy over the A part */ 1068 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1069 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1070 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1071 1072 for (i=0; i<mbs; i++) { 1073 rvals[0] = bs*(baij->rstartbs + i); 1074 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1075 for (j=ai[i]; j<ai[i+1]; j++) { 1076 col = (baij->cstartbs+aj[j])*bs; 1077 for (k=0; k<bs; k++) { 1078 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1079 col++; a += bs; 1080 } 1081 } 1082 } 1083 /* copy over the B part */ 1084 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1085 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1086 for (i=0; i<mbs; i++) { 1087 rvals[0] = bs*(baij->rstartbs + i); 1088 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1089 for (j=ai[i]; j<ai[i+1]; j++) { 1090 col = baij->garray[aj[j]]*bs; 1091 for (k=0; k<bs; k++) { 1092 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1093 col++; a += bs; 1094 } 1095 } 1096 } 1097 ierr = PetscFree(rvals);CHKERRQ(ierr); 1098 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1099 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1100 /* 1101 Everyone has to call to draw the matrix since the graphics waits are 1102 synchronized across all processors that share the PetscDraw object 1103 */ 1104 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1105 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 1106 if (!rank) { 1107 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 1108 ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1109 } 1110 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1111 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1112 ierr = MatDestroy(&A);CHKERRQ(ierr); 1113 } 1114 PetscFunctionReturn(0); 1115 } 1116 1117 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1118 { 1119 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1120 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)a->A->data; 1121 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 1122 PetscErrorCode ierr; 1123 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1124 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1125 int fd; 1126 PetscScalar *column_values; 1127 FILE *file; 1128 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1129 PetscInt message_count,flowcontrolcount; 1130 1131 PetscFunctionBegin; 1132 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1133 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1134 nz = bs2*(A->nz + B->nz); 1135 rlen = mat->rmap->n; 1136 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1137 if (!rank) { 1138 header[0] = MAT_FILE_CLASSID; 1139 header[1] = mat->rmap->N; 1140 header[2] = mat->cmap->N; 1141 1142 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1143 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1144 /* get largest number of rows any processor has */ 1145 range = mat->rmap->range; 1146 for (i=1; i<size; i++) { 1147 rlen = PetscMax(rlen,range[i+1] - range[i]); 1148 } 1149 } else { 1150 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1151 } 1152 1153 ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr); 1154 /* compute lengths of each row */ 1155 for (i=0; i<a->mbs; i++) { 1156 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1157 } 1158 /* store the row lengths to the file */ 1159 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1160 if (!rank) { 1161 MPI_Status status; 1162 ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr); 1163 rlen = (range[1] - range[0])/bs; 1164 for (i=0; i<rlen; i++) { 1165 for (j=0; j<bs; j++) { 1166 row_lens[i*bs+j] = bs*crow_lens[i]; 1167 } 1168 } 1169 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1170 for (i=1; i<size; i++) { 1171 rlen = (range[i+1] - range[i])/bs; 1172 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1173 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1174 for (k=0; k<rlen; k++) { 1175 for (j=0; j<bs; j++) { 1176 row_lens[k*bs+j] = bs*crow_lens[k]; 1177 } 1178 } 1179 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1180 } 1181 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1182 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1183 } else { 1184 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1185 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1186 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1187 } 1188 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1189 1190 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1191 information needed to make it for each row from a block row. This does require more communication but still not more than 1192 the communication needed for the nonzero values */ 1193 nzmax = nz; /* space a largest processor needs */ 1194 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1195 ierr = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr); 1196 cnt = 0; 1197 for (i=0; i<a->mbs; i++) { 1198 pcnt = cnt; 1199 for (j=B->i[i]; j<B->i[i+1]; j++) { 1200 if ((col = garray[B->j[j]]) > cstart) break; 1201 for (l=0; l<bs; l++) { 1202 column_indices[cnt++] = bs*col+l; 1203 } 1204 } 1205 for (k=A->i[i]; k<A->i[i+1]; k++) { 1206 for (l=0; l<bs; l++) { 1207 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1208 } 1209 } 1210 for (; j<B->i[i+1]; j++) { 1211 for (l=0; l<bs; l++) { 1212 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1213 } 1214 } 1215 len = cnt - pcnt; 1216 for (k=1; k<bs; k++) { 1217 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1218 cnt += len; 1219 } 1220 } 1221 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1222 1223 /* store the columns to the file */ 1224 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1225 if (!rank) { 1226 MPI_Status status; 1227 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1228 for (i=1; i<size; i++) { 1229 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1230 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1231 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1232 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1233 } 1234 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1235 } else { 1236 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1237 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1238 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1239 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1240 } 1241 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1242 1243 /* load up the numerical values */ 1244 ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr); 1245 cnt = 0; 1246 for (i=0; i<a->mbs; i++) { 1247 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1248 for (j=B->i[i]; j<B->i[i+1]; j++) { 1249 if (garray[B->j[j]] > cstart) break; 1250 for (l=0; l<bs; l++) { 1251 for (ll=0; ll<bs; ll++) { 1252 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1253 } 1254 } 1255 cnt += bs; 1256 } 1257 for (k=A->i[i]; k<A->i[i+1]; k++) { 1258 for (l=0; l<bs; l++) { 1259 for (ll=0; ll<bs; ll++) { 1260 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1261 } 1262 } 1263 cnt += bs; 1264 } 1265 for (; j<B->i[i+1]; j++) { 1266 for (l=0; l<bs; l++) { 1267 for (ll=0; ll<bs; ll++) { 1268 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1269 } 1270 } 1271 cnt += bs; 1272 } 1273 cnt += (bs-1)*rlen; 1274 } 1275 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1276 1277 /* store the column values to the file */ 1278 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1279 if (!rank) { 1280 MPI_Status status; 1281 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1282 for (i=1; i<size; i++) { 1283 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1284 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1285 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1286 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1287 } 1288 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1289 } else { 1290 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1291 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1292 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1293 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1294 } 1295 ierr = PetscFree(column_values);CHKERRQ(ierr); 1296 1297 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1298 if (file) { 1299 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1300 } 1301 PetscFunctionReturn(0); 1302 } 1303 1304 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1305 { 1306 PetscErrorCode ierr; 1307 PetscBool iascii,isdraw,issocket,isbinary; 1308 1309 PetscFunctionBegin; 1310 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1311 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1312 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1313 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1314 if (iascii || isdraw || issocket) { 1315 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1316 } else if (isbinary) { 1317 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1318 } 1319 PetscFunctionReturn(0); 1320 } 1321 1322 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1323 { 1324 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1325 PetscErrorCode ierr; 1326 1327 PetscFunctionBegin; 1328 #if defined(PETSC_USE_LOG) 1329 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1330 #endif 1331 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1332 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1333 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1334 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1335 #if defined(PETSC_USE_CTABLE) 1336 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1337 #else 1338 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1339 #endif 1340 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1341 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1342 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1343 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1344 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1345 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1346 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1347 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1348 1349 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1350 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1351 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1352 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1353 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1354 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1355 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr); 1356 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1357 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr); 1358 #if defined(PETSC_HAVE_HYPRE) 1359 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL);CHKERRQ(ierr); 1360 #endif 1361 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL);CHKERRQ(ierr); 1362 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpibaij_C",NULL);CHKERRQ(ierr); 1363 PetscFunctionReturn(0); 1364 } 1365 1366 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1367 { 1368 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1369 PetscErrorCode ierr; 1370 PetscInt nt; 1371 1372 PetscFunctionBegin; 1373 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1374 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1375 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1376 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1377 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1378 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1379 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1380 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1381 PetscFunctionReturn(0); 1382 } 1383 1384 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1385 { 1386 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1387 PetscErrorCode ierr; 1388 1389 PetscFunctionBegin; 1390 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1391 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1392 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1393 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1394 PetscFunctionReturn(0); 1395 } 1396 1397 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1398 { 1399 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1400 PetscErrorCode ierr; 1401 PetscBool merged; 1402 1403 PetscFunctionBegin; 1404 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1405 /* do nondiagonal part */ 1406 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1407 if (!merged) { 1408 /* send it on its way */ 1409 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1410 /* do local part */ 1411 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1412 /* receive remote parts: note this assumes the values are not actually */ 1413 /* inserted in yy until the next line */ 1414 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1415 } else { 1416 /* do local part */ 1417 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1418 /* send it on its way */ 1419 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1420 /* values actually were received in the Begin() but we need to call this nop */ 1421 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1422 } 1423 PetscFunctionReturn(0); 1424 } 1425 1426 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1427 { 1428 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1429 PetscErrorCode ierr; 1430 1431 PetscFunctionBegin; 1432 /* do nondiagonal part */ 1433 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1434 /* send it on its way */ 1435 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1436 /* do local part */ 1437 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1438 /* receive remote parts: note this assumes the values are not actually */ 1439 /* inserted in yy until the next line, which is true for my implementation*/ 1440 /* but is not perhaps always true. */ 1441 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1442 PetscFunctionReturn(0); 1443 } 1444 1445 /* 1446 This only works correctly for square matrices where the subblock A->A is the 1447 diagonal block 1448 */ 1449 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1450 { 1451 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1452 PetscErrorCode ierr; 1453 1454 PetscFunctionBegin; 1455 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1456 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1457 PetscFunctionReturn(0); 1458 } 1459 1460 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1461 { 1462 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1463 PetscErrorCode ierr; 1464 1465 PetscFunctionBegin; 1466 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1467 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1468 PetscFunctionReturn(0); 1469 } 1470 1471 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1472 { 1473 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1474 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1475 PetscErrorCode ierr; 1476 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1477 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1478 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1479 1480 PetscFunctionBegin; 1481 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1482 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1483 mat->getrowactive = PETSC_TRUE; 1484 1485 if (!mat->rowvalues && (idx || v)) { 1486 /* 1487 allocate enough space to hold information from the longest row. 1488 */ 1489 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1490 PetscInt max = 1,mbs = mat->mbs,tmp; 1491 for (i=0; i<mbs; i++) { 1492 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1493 if (max < tmp) max = tmp; 1494 } 1495 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1496 } 1497 lrow = row - brstart; 1498 1499 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1500 if (!v) {pvA = 0; pvB = 0;} 1501 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1502 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1503 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1504 nztot = nzA + nzB; 1505 1506 cmap = mat->garray; 1507 if (v || idx) { 1508 if (nztot) { 1509 /* Sort by increasing column numbers, assuming A and B already sorted */ 1510 PetscInt imark = -1; 1511 if (v) { 1512 *v = v_p = mat->rowvalues; 1513 for (i=0; i<nzB; i++) { 1514 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1515 else break; 1516 } 1517 imark = i; 1518 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1519 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1520 } 1521 if (idx) { 1522 *idx = idx_p = mat->rowindices; 1523 if (imark > -1) { 1524 for (i=0; i<imark; i++) { 1525 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1526 } 1527 } else { 1528 for (i=0; i<nzB; i++) { 1529 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1530 else break; 1531 } 1532 imark = i; 1533 } 1534 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1535 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1536 } 1537 } else { 1538 if (idx) *idx = 0; 1539 if (v) *v = 0; 1540 } 1541 } 1542 *nz = nztot; 1543 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1544 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1545 PetscFunctionReturn(0); 1546 } 1547 1548 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1549 { 1550 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1551 1552 PetscFunctionBegin; 1553 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1554 baij->getrowactive = PETSC_FALSE; 1555 PetscFunctionReturn(0); 1556 } 1557 1558 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1559 { 1560 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1561 PetscErrorCode ierr; 1562 1563 PetscFunctionBegin; 1564 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1565 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1566 PetscFunctionReturn(0); 1567 } 1568 1569 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1570 { 1571 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1572 Mat A = a->A,B = a->B; 1573 PetscErrorCode ierr; 1574 PetscReal isend[5],irecv[5]; 1575 1576 PetscFunctionBegin; 1577 info->block_size = (PetscReal)matin->rmap->bs; 1578 1579 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1580 1581 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1582 isend[3] = info->memory; isend[4] = info->mallocs; 1583 1584 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1585 1586 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1587 isend[3] += info->memory; isend[4] += info->mallocs; 1588 1589 if (flag == MAT_LOCAL) { 1590 info->nz_used = isend[0]; 1591 info->nz_allocated = isend[1]; 1592 info->nz_unneeded = isend[2]; 1593 info->memory = isend[3]; 1594 info->mallocs = isend[4]; 1595 } else if (flag == MAT_GLOBAL_MAX) { 1596 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1597 1598 info->nz_used = irecv[0]; 1599 info->nz_allocated = irecv[1]; 1600 info->nz_unneeded = irecv[2]; 1601 info->memory = irecv[3]; 1602 info->mallocs = irecv[4]; 1603 } else if (flag == MAT_GLOBAL_SUM) { 1604 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1605 1606 info->nz_used = irecv[0]; 1607 info->nz_allocated = irecv[1]; 1608 info->nz_unneeded = irecv[2]; 1609 info->memory = irecv[3]; 1610 info->mallocs = irecv[4]; 1611 } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1612 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1613 info->fill_ratio_needed = 0; 1614 info->factor_mallocs = 0; 1615 PetscFunctionReturn(0); 1616 } 1617 1618 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1619 { 1620 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1621 PetscErrorCode ierr; 1622 1623 PetscFunctionBegin; 1624 switch (op) { 1625 case MAT_NEW_NONZERO_LOCATIONS: 1626 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1627 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1628 case MAT_KEEP_NONZERO_PATTERN: 1629 case MAT_NEW_NONZERO_LOCATION_ERR: 1630 MatCheckPreallocated(A,1); 1631 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1632 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1633 break; 1634 case MAT_ROW_ORIENTED: 1635 MatCheckPreallocated(A,1); 1636 a->roworiented = flg; 1637 1638 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1639 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1640 break; 1641 case MAT_NEW_DIAGONALS: 1642 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1643 break; 1644 case MAT_IGNORE_OFF_PROC_ENTRIES: 1645 a->donotstash = flg; 1646 break; 1647 case MAT_USE_HASH_TABLE: 1648 a->ht_flag = flg; 1649 a->ht_fact = 1.39; 1650 break; 1651 case MAT_SYMMETRIC: 1652 case MAT_STRUCTURALLY_SYMMETRIC: 1653 case MAT_HERMITIAN: 1654 case MAT_SUBMAT_SINGLEIS: 1655 case MAT_SYMMETRY_ETERNAL: 1656 MatCheckPreallocated(A,1); 1657 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1658 break; 1659 default: 1660 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1661 } 1662 PetscFunctionReturn(0); 1663 } 1664 1665 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1666 { 1667 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1668 Mat_SeqBAIJ *Aloc; 1669 Mat B; 1670 PetscErrorCode ierr; 1671 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1672 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1673 MatScalar *a; 1674 1675 PetscFunctionBegin; 1676 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1677 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1678 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1679 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1680 /* Do not know preallocation information, but must set block size */ 1681 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr); 1682 } else { 1683 B = *matout; 1684 } 1685 1686 /* copy over the A part */ 1687 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1688 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1689 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1690 1691 for (i=0; i<mbs; i++) { 1692 rvals[0] = bs*(baij->rstartbs + i); 1693 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1694 for (j=ai[i]; j<ai[i+1]; j++) { 1695 col = (baij->cstartbs+aj[j])*bs; 1696 for (k=0; k<bs; k++) { 1697 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1698 1699 col++; a += bs; 1700 } 1701 } 1702 } 1703 /* copy over the B part */ 1704 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1705 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1706 for (i=0; i<mbs; i++) { 1707 rvals[0] = bs*(baij->rstartbs + i); 1708 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1709 for (j=ai[i]; j<ai[i+1]; j++) { 1710 col = baij->garray[aj[j]]*bs; 1711 for (k=0; k<bs; k++) { 1712 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1713 col++; 1714 a += bs; 1715 } 1716 } 1717 } 1718 ierr = PetscFree(rvals);CHKERRQ(ierr); 1719 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1720 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1721 1722 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1723 else { 1724 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 1725 } 1726 PetscFunctionReturn(0); 1727 } 1728 1729 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1730 { 1731 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1732 Mat a = baij->A,b = baij->B; 1733 PetscErrorCode ierr; 1734 PetscInt s1,s2,s3; 1735 1736 PetscFunctionBegin; 1737 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1738 if (rr) { 1739 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1740 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1741 /* Overlap communication with computation. */ 1742 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1743 } 1744 if (ll) { 1745 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1746 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1747 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1748 } 1749 /* scale the diagonal block */ 1750 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1751 1752 if (rr) { 1753 /* Do a scatter end and then right scale the off-diagonal block */ 1754 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1755 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1756 } 1757 PetscFunctionReturn(0); 1758 } 1759 1760 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1761 { 1762 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1763 PetscInt *lrows; 1764 PetscInt r, len; 1765 PetscBool cong; 1766 PetscErrorCode ierr; 1767 1768 PetscFunctionBegin; 1769 /* get locally owned rows */ 1770 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 1771 /* fix right hand side if needed */ 1772 if (x && b) { 1773 const PetscScalar *xx; 1774 PetscScalar *bb; 1775 1776 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1777 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1778 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 1779 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1780 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1781 } 1782 1783 /* actually zap the local rows */ 1784 /* 1785 Zero the required rows. If the "diagonal block" of the matrix 1786 is square and the user wishes to set the diagonal we use separate 1787 code so that MatSetValues() is not called for each diagonal allocating 1788 new memory, thus calling lots of mallocs and slowing things down. 1789 1790 */ 1791 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1792 ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1793 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 1794 if ((diag != 0.0) && cong) { 1795 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr); 1796 } else if (diag != 0.0) { 1797 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr); 1798 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1799 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1800 for (r = 0; r < len; ++r) { 1801 const PetscInt row = lrows[r] + A->rmap->rstart; 1802 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1803 } 1804 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1805 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1806 } else { 1807 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1808 } 1809 ierr = PetscFree(lrows);CHKERRQ(ierr); 1810 1811 /* only change matrix nonzero state if pattern was allowed to be changed */ 1812 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1813 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1814 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1815 } 1816 PetscFunctionReturn(0); 1817 } 1818 1819 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1820 { 1821 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1822 PetscErrorCode ierr; 1823 PetscMPIInt n = A->rmap->n; 1824 PetscInt i,j,k,r,p = 0,len = 0,row,col,count; 1825 PetscInt *lrows,*owners = A->rmap->range; 1826 PetscSFNode *rrows; 1827 PetscSF sf; 1828 const PetscScalar *xx; 1829 PetscScalar *bb,*mask; 1830 Vec xmask,lmask; 1831 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)l->B->data; 1832 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1833 PetscScalar *aa; 1834 1835 PetscFunctionBegin; 1836 /* Create SF where leaves are input rows and roots are owned rows */ 1837 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1838 for (r = 0; r < n; ++r) lrows[r] = -1; 1839 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1840 for (r = 0; r < N; ++r) { 1841 const PetscInt idx = rows[r]; 1842 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1843 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1844 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1845 } 1846 rrows[r].rank = p; 1847 rrows[r].index = rows[r] - owners[p]; 1848 } 1849 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1850 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1851 /* Collect flags for rows to be zeroed */ 1852 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1853 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1854 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1855 /* Compress and put in row numbers */ 1856 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1857 /* zero diagonal part of matrix */ 1858 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1859 /* handle off diagonal part of matrix */ 1860 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1861 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1862 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1863 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1864 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1865 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1866 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1867 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1868 if (x) { 1869 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1870 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1871 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1872 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1873 } 1874 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1875 /* remove zeroed rows of off diagonal matrix */ 1876 for (i = 0; i < len; ++i) { 1877 row = lrows[i]; 1878 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1879 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 1880 for (k = 0; k < count; ++k) { 1881 aa[0] = 0.0; 1882 aa += bs; 1883 } 1884 } 1885 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1886 for (i = 0; i < l->B->rmap->N; ++i) { 1887 row = i/bs; 1888 for (j = baij->i[row]; j < baij->i[row+1]; ++j) { 1889 for (k = 0; k < bs; ++k) { 1890 col = bs*baij->j[j] + k; 1891 if (PetscAbsScalar(mask[col])) { 1892 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 1893 if (x) bb[i] -= aa[0]*xx[col]; 1894 aa[0] = 0.0; 1895 } 1896 } 1897 } 1898 } 1899 if (x) { 1900 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1901 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1902 } 1903 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1904 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1905 ierr = PetscFree(lrows);CHKERRQ(ierr); 1906 1907 /* only change matrix nonzero state if pattern was allowed to be changed */ 1908 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1909 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1910 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1911 } 1912 PetscFunctionReturn(0); 1913 } 1914 1915 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1916 { 1917 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1918 PetscErrorCode ierr; 1919 1920 PetscFunctionBegin; 1921 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1922 PetscFunctionReturn(0); 1923 } 1924 1925 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 1926 1927 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1928 { 1929 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1930 Mat a,b,c,d; 1931 PetscBool flg; 1932 PetscErrorCode ierr; 1933 1934 PetscFunctionBegin; 1935 a = matA->A; b = matA->B; 1936 c = matB->A; d = matB->B; 1937 1938 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1939 if (flg) { 1940 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1941 } 1942 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1943 PetscFunctionReturn(0); 1944 } 1945 1946 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1947 { 1948 PetscErrorCode ierr; 1949 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1950 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 1951 1952 PetscFunctionBegin; 1953 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1954 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1955 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1956 } else { 1957 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1958 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1959 } 1960 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 1961 PetscFunctionReturn(0); 1962 } 1963 1964 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1965 { 1966 PetscErrorCode ierr; 1967 1968 PetscFunctionBegin; 1969 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1970 PetscFunctionReturn(0); 1971 } 1972 1973 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 1974 { 1975 PetscErrorCode ierr; 1976 PetscInt bs = Y->rmap->bs,m = Y->rmap->N/bs; 1977 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 1978 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 1979 1980 PetscFunctionBegin; 1981 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 1982 PetscFunctionReturn(0); 1983 } 1984 1985 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1986 { 1987 PetscErrorCode ierr; 1988 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 1989 PetscBLASInt bnz,one=1; 1990 Mat_SeqBAIJ *x,*y; 1991 PetscInt bs2 = Y->rmap->bs*Y->rmap->bs; 1992 1993 PetscFunctionBegin; 1994 if (str == SAME_NONZERO_PATTERN) { 1995 PetscScalar alpha = a; 1996 x = (Mat_SeqBAIJ*)xx->A->data; 1997 y = (Mat_SeqBAIJ*)yy->A->data; 1998 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 1999 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2000 x = (Mat_SeqBAIJ*)xx->B->data; 2001 y = (Mat_SeqBAIJ*)yy->B->data; 2002 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2003 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2004 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2005 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2006 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2007 } else { 2008 Mat B; 2009 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 2010 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2011 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2012 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2013 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2014 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2015 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2016 ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr); 2017 ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2018 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2019 ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2020 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 2021 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2022 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2023 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2024 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2025 } 2026 PetscFunctionReturn(0); 2027 } 2028 2029 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 2030 { 2031 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2032 PetscErrorCode ierr; 2033 2034 PetscFunctionBegin; 2035 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2036 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2037 PetscFunctionReturn(0); 2038 } 2039 2040 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2041 { 2042 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2043 PetscErrorCode ierr; 2044 2045 PetscFunctionBegin; 2046 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2047 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2048 PetscFunctionReturn(0); 2049 } 2050 2051 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2052 { 2053 PetscErrorCode ierr; 2054 IS iscol_local; 2055 PetscInt csize; 2056 2057 PetscFunctionBegin; 2058 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2059 if (call == MAT_REUSE_MATRIX) { 2060 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2061 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2062 } else { 2063 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2064 } 2065 ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2066 if (call == MAT_INITIAL_MATRIX) { 2067 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2068 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2069 } 2070 PetscFunctionReturn(0); 2071 } 2072 2073 /* 2074 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2075 in local and then by concatenating the local matrices the end result. 2076 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 2077 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 2078 */ 2079 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2080 { 2081 PetscErrorCode ierr; 2082 PetscMPIInt rank,size; 2083 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2084 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2085 Mat M,Mreuse; 2086 MatScalar *vwork,*aa; 2087 MPI_Comm comm; 2088 IS isrow_new, iscol_new; 2089 Mat_SeqBAIJ *aij; 2090 2091 PetscFunctionBegin; 2092 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2093 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2094 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2095 /* The compression and expansion should be avoided. Doesn't point 2096 out errors, might change the indices, hence buggey */ 2097 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 2098 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 2099 2100 if (call == MAT_REUSE_MATRIX) { 2101 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 2102 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2103 ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse);CHKERRQ(ierr); 2104 } else { 2105 ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse);CHKERRQ(ierr); 2106 } 2107 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 2108 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 2109 /* 2110 m - number of local rows 2111 n - number of columns (same on all processors) 2112 rstart - first row in new global matrix generated 2113 */ 2114 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2115 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2116 m = m/bs; 2117 n = n/bs; 2118 2119 if (call == MAT_INITIAL_MATRIX) { 2120 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2121 ii = aij->i; 2122 jj = aij->j; 2123 2124 /* 2125 Determine the number of non-zeros in the diagonal and off-diagonal 2126 portions of the matrix in order to do correct preallocation 2127 */ 2128 2129 /* first get start and end of "diagonal" columns */ 2130 if (csize == PETSC_DECIDE) { 2131 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2132 if (mglobal == n*bs) { /* square matrix */ 2133 nlocal = m; 2134 } else { 2135 nlocal = n/size + ((n % size) > rank); 2136 } 2137 } else { 2138 nlocal = csize/bs; 2139 } 2140 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2141 rstart = rend - nlocal; 2142 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2143 2144 /* next, compute all the lengths */ 2145 ierr = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr); 2146 for (i=0; i<m; i++) { 2147 jend = ii[i+1] - ii[i]; 2148 olen = 0; 2149 dlen = 0; 2150 for (j=0; j<jend; j++) { 2151 if (*jj < rstart || *jj >= rend) olen++; 2152 else dlen++; 2153 jj++; 2154 } 2155 olens[i] = olen; 2156 dlens[i] = dlen; 2157 } 2158 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2159 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2160 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2161 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2162 ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2163 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 2164 } else { 2165 PetscInt ml,nl; 2166 2167 M = *newmat; 2168 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2169 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2170 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2171 /* 2172 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2173 rather than the slower MatSetValues(). 2174 */ 2175 M->was_assembled = PETSC_TRUE; 2176 M->assembled = PETSC_FALSE; 2177 } 2178 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2179 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2180 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2181 ii = aij->i; 2182 jj = aij->j; 2183 aa = aij->a; 2184 for (i=0; i<m; i++) { 2185 row = rstart/bs + i; 2186 nz = ii[i+1] - ii[i]; 2187 cwork = jj; jj += nz; 2188 vwork = aa; aa += nz*bs*bs; 2189 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2190 } 2191 2192 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2193 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2194 *newmat = M; 2195 2196 /* save submatrix used in processor for next request */ 2197 if (call == MAT_INITIAL_MATRIX) { 2198 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2199 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2200 } 2201 PetscFunctionReturn(0); 2202 } 2203 2204 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2205 { 2206 MPI_Comm comm,pcomm; 2207 PetscInt clocal_size,nrows; 2208 const PetscInt *rows; 2209 PetscMPIInt size; 2210 IS crowp,lcolp; 2211 PetscErrorCode ierr; 2212 2213 PetscFunctionBegin; 2214 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2215 /* make a collective version of 'rowp' */ 2216 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2217 if (pcomm==comm) { 2218 crowp = rowp; 2219 } else { 2220 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2221 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2222 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2223 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2224 } 2225 ierr = ISSetPermutation(crowp);CHKERRQ(ierr); 2226 /* make a local version of 'colp' */ 2227 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2228 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2229 if (size==1) { 2230 lcolp = colp; 2231 } else { 2232 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 2233 } 2234 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2235 /* now we just get the submatrix */ 2236 ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr); 2237 ierr = MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2238 /* clean up */ 2239 if (pcomm!=comm) { 2240 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2241 } 2242 if (size>1) { 2243 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2244 } 2245 PetscFunctionReturn(0); 2246 } 2247 2248 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2249 { 2250 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2251 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2252 2253 PetscFunctionBegin; 2254 if (nghosts) *nghosts = B->nbs; 2255 if (ghosts) *ghosts = baij->garray; 2256 PetscFunctionReturn(0); 2257 } 2258 2259 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2260 { 2261 Mat B; 2262 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2263 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2264 Mat_SeqAIJ *b; 2265 PetscErrorCode ierr; 2266 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2267 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2268 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2269 2270 PetscFunctionBegin; 2271 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2272 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2273 2274 /* ---------------------------------------------------------------- 2275 Tell every processor the number of nonzeros per row 2276 */ 2277 ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr); 2278 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2279 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2280 } 2281 ierr = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr); 2282 displs = recvcounts + size; 2283 for (i=0; i<size; i++) { 2284 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2285 displs[i] = A->rmap->range[i]/bs; 2286 } 2287 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2288 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2289 #else 2290 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2291 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2292 #endif 2293 /* --------------------------------------------------------------- 2294 Create the sequential matrix of the same type as the local block diagonal 2295 */ 2296 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2297 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2298 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2299 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2300 b = (Mat_SeqAIJ*)B->data; 2301 2302 /*-------------------------------------------------------------------- 2303 Copy my part of matrix column indices over 2304 */ 2305 sendcount = ad->nz + bd->nz; 2306 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2307 a_jsendbuf = ad->j; 2308 b_jsendbuf = bd->j; 2309 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2310 cnt = 0; 2311 for (i=0; i<n; i++) { 2312 2313 /* put in lower diagonal portion */ 2314 m = bd->i[i+1] - bd->i[i]; 2315 while (m > 0) { 2316 /* is it above diagonal (in bd (compressed) numbering) */ 2317 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2318 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2319 m--; 2320 } 2321 2322 /* put in diagonal portion */ 2323 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2324 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2325 } 2326 2327 /* put in upper diagonal portion */ 2328 while (m-- > 0) { 2329 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2330 } 2331 } 2332 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2333 2334 /*-------------------------------------------------------------------- 2335 Gather all column indices to all processors 2336 */ 2337 for (i=0; i<size; i++) { 2338 recvcounts[i] = 0; 2339 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2340 recvcounts[i] += lens[j]; 2341 } 2342 } 2343 displs[0] = 0; 2344 for (i=1; i<size; i++) { 2345 displs[i] = displs[i-1] + recvcounts[i-1]; 2346 } 2347 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2348 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2349 #else 2350 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2351 #endif 2352 /*-------------------------------------------------------------------- 2353 Assemble the matrix into useable form (note numerical values not yet set) 2354 */ 2355 /* set the b->ilen (length of each row) values */ 2356 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2357 /* set the b->i indices */ 2358 b->i[0] = 0; 2359 for (i=1; i<=A->rmap->N/bs; i++) { 2360 b->i[i] = b->i[i-1] + lens[i-1]; 2361 } 2362 ierr = PetscFree(lens);CHKERRQ(ierr); 2363 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2364 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2365 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2366 2367 if (A->symmetric) { 2368 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2369 } else if (A->hermitian) { 2370 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2371 } else if (A->structurally_symmetric) { 2372 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2373 } 2374 *newmat = B; 2375 PetscFunctionReturn(0); 2376 } 2377 2378 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2379 { 2380 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2381 PetscErrorCode ierr; 2382 Vec bb1 = 0; 2383 2384 PetscFunctionBegin; 2385 if (flag == SOR_APPLY_UPPER) { 2386 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2387 PetscFunctionReturn(0); 2388 } 2389 2390 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2391 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2392 } 2393 2394 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2395 if (flag & SOR_ZERO_INITIAL_GUESS) { 2396 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2397 its--; 2398 } 2399 2400 while (its--) { 2401 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2402 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2403 2404 /* update rhs: bb1 = bb - B*x */ 2405 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2406 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2407 2408 /* local sweep */ 2409 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2410 } 2411 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2412 if (flag & SOR_ZERO_INITIAL_GUESS) { 2413 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2414 its--; 2415 } 2416 while (its--) { 2417 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2418 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2419 2420 /* update rhs: bb1 = bb - B*x */ 2421 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2422 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2423 2424 /* local sweep */ 2425 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2426 } 2427 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2428 if (flag & SOR_ZERO_INITIAL_GUESS) { 2429 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2430 its--; 2431 } 2432 while (its--) { 2433 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2434 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2435 2436 /* update rhs: bb1 = bb - B*x */ 2437 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2438 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2439 2440 /* local sweep */ 2441 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2442 } 2443 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2444 2445 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2446 PetscFunctionReturn(0); 2447 } 2448 2449 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms) 2450 { 2451 PetscErrorCode ierr; 2452 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data; 2453 PetscInt N,i,*garray = aij->garray; 2454 PetscInt ib,jb,bs = A->rmap->bs; 2455 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data; 2456 MatScalar *a_val = a_aij->a; 2457 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data; 2458 MatScalar *b_val = b_aij->a; 2459 PetscReal *work; 2460 2461 PetscFunctionBegin; 2462 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 2463 ierr = PetscCalloc1(N,&work);CHKERRQ(ierr); 2464 if (type == NORM_2) { 2465 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2466 for (jb=0; jb<bs; jb++) { 2467 for (ib=0; ib<bs; ib++) { 2468 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2469 a_val++; 2470 } 2471 } 2472 } 2473 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2474 for (jb=0; jb<bs; jb++) { 2475 for (ib=0; ib<bs; ib++) { 2476 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2477 b_val++; 2478 } 2479 } 2480 } 2481 } else if (type == NORM_1) { 2482 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2483 for (jb=0; jb<bs; jb++) { 2484 for (ib=0; ib<bs; ib++) { 2485 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2486 a_val++; 2487 } 2488 } 2489 } 2490 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2491 for (jb=0; jb<bs; jb++) { 2492 for (ib=0; ib<bs; ib++) { 2493 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2494 b_val++; 2495 } 2496 } 2497 } 2498 } else if (type == NORM_INFINITY) { 2499 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2500 for (jb=0; jb<bs; jb++) { 2501 for (ib=0; ib<bs; ib++) { 2502 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2503 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2504 a_val++; 2505 } 2506 } 2507 } 2508 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2509 for (jb=0; jb<bs; jb++) { 2510 for (ib=0; ib<bs; ib++) { 2511 int col = garray[b_aij->j[i]] * bs + jb; 2512 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2513 b_val++; 2514 } 2515 } 2516 } 2517 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 2518 if (type == NORM_INFINITY) { 2519 ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2520 } else { 2521 ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2522 } 2523 ierr = PetscFree(work);CHKERRQ(ierr); 2524 if (type == NORM_2) { 2525 for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]); 2526 } 2527 PetscFunctionReturn(0); 2528 } 2529 2530 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2531 { 2532 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2533 PetscErrorCode ierr; 2534 2535 PetscFunctionBegin; 2536 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2537 A->factorerrortype = a->A->factorerrortype; 2538 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2539 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2540 PetscFunctionReturn(0); 2541 } 2542 2543 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a) 2544 { 2545 PetscErrorCode ierr; 2546 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ*)Y->data; 2547 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)maij->A->data; 2548 2549 PetscFunctionBegin; 2550 if (!Y->preallocated) { 2551 ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 2552 } else if (!aij->nz) { 2553 PetscInt nonew = aij->nonew; 2554 ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2555 aij->nonew = nonew; 2556 } 2557 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2558 PetscFunctionReturn(0); 2559 } 2560 2561 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool *missing,PetscInt *d) 2562 { 2563 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2564 PetscErrorCode ierr; 2565 2566 PetscFunctionBegin; 2567 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2568 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2569 if (d) { 2570 PetscInt rstart; 2571 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2572 *d += rstart/A->rmap->bs; 2573 2574 } 2575 PetscFunctionReturn(0); 2576 } 2577 2578 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2579 { 2580 PetscFunctionBegin; 2581 *a = ((Mat_MPIBAIJ*)A->data)->A; 2582 PetscFunctionReturn(0); 2583 } 2584 2585 /* -------------------------------------------------------------------*/ 2586 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2587 MatGetRow_MPIBAIJ, 2588 MatRestoreRow_MPIBAIJ, 2589 MatMult_MPIBAIJ, 2590 /* 4*/ MatMultAdd_MPIBAIJ, 2591 MatMultTranspose_MPIBAIJ, 2592 MatMultTransposeAdd_MPIBAIJ, 2593 0, 2594 0, 2595 0, 2596 /*10*/ 0, 2597 0, 2598 0, 2599 MatSOR_MPIBAIJ, 2600 MatTranspose_MPIBAIJ, 2601 /*15*/ MatGetInfo_MPIBAIJ, 2602 MatEqual_MPIBAIJ, 2603 MatGetDiagonal_MPIBAIJ, 2604 MatDiagonalScale_MPIBAIJ, 2605 MatNorm_MPIBAIJ, 2606 /*20*/ MatAssemblyBegin_MPIBAIJ, 2607 MatAssemblyEnd_MPIBAIJ, 2608 MatSetOption_MPIBAIJ, 2609 MatZeroEntries_MPIBAIJ, 2610 /*24*/ MatZeroRows_MPIBAIJ, 2611 0, 2612 0, 2613 0, 2614 0, 2615 /*29*/ MatSetUp_MPIBAIJ, 2616 0, 2617 0, 2618 MatGetDiagonalBlock_MPIBAIJ, 2619 0, 2620 /*34*/ MatDuplicate_MPIBAIJ, 2621 0, 2622 0, 2623 0, 2624 0, 2625 /*39*/ MatAXPY_MPIBAIJ, 2626 MatCreateSubMatrices_MPIBAIJ, 2627 MatIncreaseOverlap_MPIBAIJ, 2628 MatGetValues_MPIBAIJ, 2629 MatCopy_MPIBAIJ, 2630 /*44*/ 0, 2631 MatScale_MPIBAIJ, 2632 MatShift_MPIBAIJ, 2633 0, 2634 MatZeroRowsColumns_MPIBAIJ, 2635 /*49*/ 0, 2636 0, 2637 0, 2638 0, 2639 0, 2640 /*54*/ MatFDColoringCreate_MPIXAIJ, 2641 0, 2642 MatSetUnfactored_MPIBAIJ, 2643 MatPermute_MPIBAIJ, 2644 MatSetValuesBlocked_MPIBAIJ, 2645 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2646 MatDestroy_MPIBAIJ, 2647 MatView_MPIBAIJ, 2648 0, 2649 0, 2650 /*64*/ 0, 2651 0, 2652 0, 2653 0, 2654 0, 2655 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2656 0, 2657 0, 2658 0, 2659 0, 2660 /*74*/ 0, 2661 MatFDColoringApply_BAIJ, 2662 0, 2663 0, 2664 0, 2665 /*79*/ 0, 2666 0, 2667 0, 2668 0, 2669 MatLoad_MPIBAIJ, 2670 /*84*/ 0, 2671 0, 2672 0, 2673 0, 2674 0, 2675 /*89*/ 0, 2676 0, 2677 0, 2678 0, 2679 0, 2680 /*94*/ 0, 2681 0, 2682 0, 2683 0, 2684 0, 2685 /*99*/ 0, 2686 0, 2687 0, 2688 0, 2689 0, 2690 /*104*/0, 2691 MatRealPart_MPIBAIJ, 2692 MatImaginaryPart_MPIBAIJ, 2693 0, 2694 0, 2695 /*109*/0, 2696 0, 2697 0, 2698 0, 2699 MatMissingDiagonal_MPIBAIJ, 2700 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2701 0, 2702 MatGetGhosts_MPIBAIJ, 2703 0, 2704 0, 2705 /*119*/0, 2706 0, 2707 0, 2708 0, 2709 MatGetMultiProcBlock_MPIBAIJ, 2710 /*124*/0, 2711 MatGetColumnNorms_MPIBAIJ, 2712 MatInvertBlockDiagonal_MPIBAIJ, 2713 0, 2714 0, 2715 /*129*/ 0, 2716 0, 2717 0, 2718 0, 2719 0, 2720 /*134*/ 0, 2721 0, 2722 0, 2723 0, 2724 0, 2725 /*139*/ MatSetBlockSizes_Default, 2726 0, 2727 0, 2728 MatFDColoringSetUp_MPIXAIJ, 2729 0, 2730 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ 2731 }; 2732 2733 2734 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 2735 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 2736 2737 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2738 { 2739 PetscInt m,rstart,cstart,cend; 2740 PetscInt i,j,dlen,olen,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2741 const PetscInt *JJ =0; 2742 PetscScalar *values=0; 2743 PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented; 2744 PetscErrorCode ierr; 2745 2746 PetscFunctionBegin; 2747 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2748 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2749 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2750 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2751 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2752 m = B->rmap->n/bs; 2753 rstart = B->rmap->rstart/bs; 2754 cstart = B->cmap->rstart/bs; 2755 cend = B->cmap->rend/bs; 2756 2757 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2758 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2759 for (i=0; i<m; i++) { 2760 nz = ii[i+1] - ii[i]; 2761 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2762 nz_max = PetscMax(nz_max,nz); 2763 dlen = 0; 2764 olen = 0; 2765 JJ = jj + ii[i]; 2766 for (j=0; j<nz; j++) { 2767 if (*JJ < cstart || *JJ >= cend) olen++; 2768 else dlen++; 2769 JJ++; 2770 } 2771 d_nnz[i] = dlen; 2772 o_nnz[i] = olen; 2773 } 2774 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2775 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2776 2777 values = (PetscScalar*)V; 2778 if (!values) { 2779 ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2780 } 2781 for (i=0; i<m; i++) { 2782 PetscInt row = i + rstart; 2783 PetscInt ncols = ii[i+1] - ii[i]; 2784 const PetscInt *icols = jj + ii[i]; 2785 if (!roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2786 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2787 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2788 } else { /* block ordering does not match so we can only insert one block at a time. */ 2789 PetscInt j; 2790 for (j=0; j<ncols; j++) { 2791 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2792 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2793 } 2794 } 2795 } 2796 2797 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2798 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2799 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2800 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2801 PetscFunctionReturn(0); 2802 } 2803 2804 /*@C 2805 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2806 (the default parallel PETSc format). 2807 2808 Collective on MPI_Comm 2809 2810 Input Parameters: 2811 + B - the matrix 2812 . bs - the block size 2813 . i - the indices into j for the start of each local row (starts with zero) 2814 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2815 - v - optional values in the matrix 2816 2817 Level: developer 2818 2819 Notes: 2820 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 2821 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 2822 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2823 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2824 block column and the second index is over columns within a block. 2825 2826 .keywords: matrix, aij, compressed row, sparse, parallel 2827 2828 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ 2829 @*/ 2830 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2831 { 2832 PetscErrorCode ierr; 2833 2834 PetscFunctionBegin; 2835 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2836 PetscValidType(B,1); 2837 PetscValidLogicalCollectiveInt(B,bs,2); 2838 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2839 PetscFunctionReturn(0); 2840 } 2841 2842 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2843 { 2844 Mat_MPIBAIJ *b; 2845 PetscErrorCode ierr; 2846 PetscInt i; 2847 2848 PetscFunctionBegin; 2849 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2850 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2851 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2852 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2853 2854 if (d_nnz) { 2855 for (i=0; i<B->rmap->n/bs; i++) { 2856 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2857 } 2858 } 2859 if (o_nnz) { 2860 for (i=0; i<B->rmap->n/bs; i++) { 2861 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2862 } 2863 } 2864 2865 b = (Mat_MPIBAIJ*)B->data; 2866 b->bs2 = bs*bs; 2867 b->mbs = B->rmap->n/bs; 2868 b->nbs = B->cmap->n/bs; 2869 b->Mbs = B->rmap->N/bs; 2870 b->Nbs = B->cmap->N/bs; 2871 2872 for (i=0; i<=b->size; i++) { 2873 b->rangebs[i] = B->rmap->range[i]/bs; 2874 } 2875 b->rstartbs = B->rmap->rstart/bs; 2876 b->rendbs = B->rmap->rend/bs; 2877 b->cstartbs = B->cmap->rstart/bs; 2878 b->cendbs = B->cmap->rend/bs; 2879 2880 #if defined(PETSC_USE_CTABLE) 2881 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2882 #else 2883 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2884 #endif 2885 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2886 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2887 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2888 2889 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2890 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2891 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2892 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2893 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2894 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2895 2896 if (!B->preallocated) { 2897 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2898 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2899 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2900 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2901 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2902 } 2903 2904 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2905 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2906 B->preallocated = PETSC_TRUE; 2907 B->was_assembled = PETSC_FALSE; 2908 B->assembled = PETSC_FALSE; 2909 PetscFunctionReturn(0); 2910 } 2911 2912 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2913 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2914 2915 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 2916 { 2917 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2918 PetscErrorCode ierr; 2919 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2920 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2921 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2922 2923 PetscFunctionBegin; 2924 ierr = PetscMalloc1(M+1,&ii);CHKERRQ(ierr); 2925 ii[0] = 0; 2926 for (i=0; i<M; i++) { 2927 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2928 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 2929 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2930 /* remove one from count of matrix has diagonal */ 2931 for (j=id[i]; j<id[i+1]; j++) { 2932 if (jd[j] == i) {ii[i+1]--;break;} 2933 } 2934 } 2935 ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr); 2936 cnt = 0; 2937 for (i=0; i<M; i++) { 2938 for (j=io[i]; j<io[i+1]; j++) { 2939 if (garray[jo[j]] > rstart) break; 2940 jj[cnt++] = garray[jo[j]]; 2941 } 2942 for (k=id[i]; k<id[i+1]; k++) { 2943 if (jd[k] != i) { 2944 jj[cnt++] = rstart + jd[k]; 2945 } 2946 } 2947 for (; j<io[i+1]; j++) { 2948 jj[cnt++] = garray[jo[j]]; 2949 } 2950 } 2951 ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr); 2952 PetscFunctionReturn(0); 2953 } 2954 2955 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2956 2957 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 2958 2959 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 2960 { 2961 PetscErrorCode ierr; 2962 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2963 Mat B; 2964 Mat_MPIAIJ *b; 2965 2966 PetscFunctionBegin; 2967 if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 2968 2969 if (reuse == MAT_REUSE_MATRIX) { 2970 B = *newmat; 2971 } else { 2972 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2973 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2974 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2975 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2976 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 2977 ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 2978 } 2979 b = (Mat_MPIAIJ*) B->data; 2980 2981 if (reuse == MAT_REUSE_MATRIX) { 2982 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A);CHKERRQ(ierr); 2983 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B);CHKERRQ(ierr); 2984 } else { 2985 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 2986 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2987 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 2988 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 2989 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 2990 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2991 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2992 } 2993 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2994 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2995 2996 if (reuse == MAT_INPLACE_MATRIX) { 2997 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 2998 } else { 2999 *newmat = B; 3000 } 3001 PetscFunctionReturn(0); 3002 } 3003 3004 /*MC 3005 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 3006 3007 Options Database Keys: 3008 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 3009 . -mat_block_size <bs> - set the blocksize used to store the matrix 3010 - -mat_use_hash_table <fact> 3011 3012 Level: beginner 3013 3014 .seealso: MatCreateMPIBAIJ 3015 M*/ 3016 3017 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 3018 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 3019 3020 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3021 { 3022 Mat_MPIBAIJ *b; 3023 PetscErrorCode ierr; 3024 PetscBool flg = PETSC_FALSE; 3025 3026 PetscFunctionBegin; 3027 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3028 B->data = (void*)b; 3029 3030 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3031 B->assembled = PETSC_FALSE; 3032 3033 B->insertmode = NOT_SET_VALUES; 3034 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 3035 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 3036 3037 /* build local table of row and column ownerships */ 3038 ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr); 3039 3040 /* build cache for off array entries formed */ 3041 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 3042 3043 b->donotstash = PETSC_FALSE; 3044 b->colmap = NULL; 3045 b->garray = NULL; 3046 b->roworiented = PETSC_TRUE; 3047 3048 /* stuff used in block assembly */ 3049 b->barray = 0; 3050 3051 /* stuff used for matrix vector multiply */ 3052 b->lvec = 0; 3053 b->Mvctx = 0; 3054 3055 /* stuff for MatGetRow() */ 3056 b->rowindices = 0; 3057 b->rowvalues = 0; 3058 b->getrowactive = PETSC_FALSE; 3059 3060 /* hash table stuff */ 3061 b->ht = 0; 3062 b->hd = 0; 3063 b->ht_size = 0; 3064 b->ht_flag = PETSC_FALSE; 3065 b->ht_fact = 0; 3066 b->ht_total_ct = 0; 3067 b->ht_insert_ct = 0; 3068 3069 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 3070 b->ijonly = PETSC_FALSE; 3071 3072 3073 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3074 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3075 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3076 #if defined(PETSC_HAVE_HYPRE) 3077 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 3078 #endif 3079 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3080 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3081 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3082 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3083 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3084 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3085 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpibaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 3086 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 3087 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3088 3089 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3090 ierr = PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg);CHKERRQ(ierr); 3091 if (flg) { 3092 PetscReal fact = 1.39; 3093 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3094 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 3095 if (fact <= 1.0) fact = 1.39; 3096 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3097 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3098 } 3099 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3100 PetscFunctionReturn(0); 3101 } 3102 3103 /*MC 3104 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3105 3106 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3107 and MATMPIBAIJ otherwise. 3108 3109 Options Database Keys: 3110 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3111 3112 Level: beginner 3113 3114 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3115 M*/ 3116 3117 /*@C 3118 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3119 (block compressed row). For good matrix assembly performance 3120 the user should preallocate the matrix storage by setting the parameters 3121 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3122 performance can be increased by more than a factor of 50. 3123 3124 Collective on Mat 3125 3126 Input Parameters: 3127 + B - the matrix 3128 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3129 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3130 . d_nz - number of block nonzeros per block row in diagonal portion of local 3131 submatrix (same for all local rows) 3132 . d_nnz - array containing the number of block nonzeros in the various block rows 3133 of the in diagonal portion of the local (possibly different for each block 3134 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 3135 set it even if it is zero. 3136 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3137 submatrix (same for all local rows). 3138 - o_nnz - array containing the number of nonzeros in the various block rows of the 3139 off-diagonal portion of the local submatrix (possibly different for 3140 each block row) or NULL. 3141 3142 If the *_nnz parameter is given then the *_nz parameter is ignored 3143 3144 Options Database Keys: 3145 + -mat_block_size - size of the blocks to use 3146 - -mat_use_hash_table <fact> 3147 3148 Notes: 3149 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3150 than it must be used on all processors that share the object for that argument. 3151 3152 Storage Information: 3153 For a square global matrix we define each processor's diagonal portion 3154 to be its local rows and the corresponding columns (a square submatrix); 3155 each processor's off-diagonal portion encompasses the remainder of the 3156 local matrix (a rectangular submatrix). 3157 3158 The user can specify preallocated storage for the diagonal part of 3159 the local submatrix with either d_nz or d_nnz (not both). Set 3160 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3161 memory allocation. Likewise, specify preallocated storage for the 3162 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3163 3164 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3165 the figure below we depict these three local rows and all columns (0-11). 3166 3167 .vb 3168 0 1 2 3 4 5 6 7 8 9 10 11 3169 -------------------------- 3170 row 3 |o o o d d d o o o o o o 3171 row 4 |o o o d d d o o o o o o 3172 row 5 |o o o d d d o o o o o o 3173 -------------------------- 3174 .ve 3175 3176 Thus, any entries in the d locations are stored in the d (diagonal) 3177 submatrix, and any entries in the o locations are stored in the 3178 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3179 stored simply in the MATSEQBAIJ format for compressed row storage. 3180 3181 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3182 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3183 In general, for PDE problems in which most nonzeros are near the diagonal, 3184 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3185 or you will get TERRIBLE performance; see the users' manual chapter on 3186 matrices. 3187 3188 You can call MatGetInfo() to get information on how effective the preallocation was; 3189 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3190 You can also run with the option -info and look for messages with the string 3191 malloc in them to see if additional memory allocation was needed. 3192 3193 Level: intermediate 3194 3195 .keywords: matrix, block, aij, compressed row, sparse, parallel 3196 3197 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3198 @*/ 3199 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3200 { 3201 PetscErrorCode ierr; 3202 3203 PetscFunctionBegin; 3204 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3205 PetscValidType(B,1); 3206 PetscValidLogicalCollectiveInt(B,bs,2); 3207 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3208 PetscFunctionReturn(0); 3209 } 3210 3211 /*@C 3212 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3213 (block compressed row). For good matrix assembly performance 3214 the user should preallocate the matrix storage by setting the parameters 3215 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3216 performance can be increased by more than a factor of 50. 3217 3218 Collective on MPI_Comm 3219 3220 Input Parameters: 3221 + comm - MPI communicator 3222 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3223 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3224 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3225 This value should be the same as the local size used in creating the 3226 y vector for the matrix-vector product y = Ax. 3227 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3228 This value should be the same as the local size used in creating the 3229 x vector for the matrix-vector product y = Ax. 3230 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3231 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3232 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3233 submatrix (same for all local rows) 3234 . d_nnz - array containing the number of nonzero blocks in the various block rows 3235 of the in diagonal portion of the local (possibly different for each block 3236 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3237 and set it even if it is zero. 3238 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3239 submatrix (same for all local rows). 3240 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3241 off-diagonal portion of the local submatrix (possibly different for 3242 each block row) or NULL. 3243 3244 Output Parameter: 3245 . A - the matrix 3246 3247 Options Database Keys: 3248 + -mat_block_size - size of the blocks to use 3249 - -mat_use_hash_table <fact> 3250 3251 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3252 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3253 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3254 3255 Notes: 3256 If the *_nnz parameter is given then the *_nz parameter is ignored 3257 3258 A nonzero block is any block that as 1 or more nonzeros in it 3259 3260 The user MUST specify either the local or global matrix dimensions 3261 (possibly both). 3262 3263 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3264 than it must be used on all processors that share the object for that argument. 3265 3266 Storage Information: 3267 For a square global matrix we define each processor's diagonal portion 3268 to be its local rows and the corresponding columns (a square submatrix); 3269 each processor's off-diagonal portion encompasses the remainder of the 3270 local matrix (a rectangular submatrix). 3271 3272 The user can specify preallocated storage for the diagonal part of 3273 the local submatrix with either d_nz or d_nnz (not both). Set 3274 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3275 memory allocation. Likewise, specify preallocated storage for the 3276 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3277 3278 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3279 the figure below we depict these three local rows and all columns (0-11). 3280 3281 .vb 3282 0 1 2 3 4 5 6 7 8 9 10 11 3283 -------------------------- 3284 row 3 |o o o d d d o o o o o o 3285 row 4 |o o o d d d o o o o o o 3286 row 5 |o o o d d d o o o o o o 3287 -------------------------- 3288 .ve 3289 3290 Thus, any entries in the d locations are stored in the d (diagonal) 3291 submatrix, and any entries in the o locations are stored in the 3292 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3293 stored simply in the MATSEQBAIJ format for compressed row storage. 3294 3295 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3296 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3297 In general, for PDE problems in which most nonzeros are near the diagonal, 3298 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3299 or you will get TERRIBLE performance; see the users' manual chapter on 3300 matrices. 3301 3302 Level: intermediate 3303 3304 .keywords: matrix, block, aij, compressed row, sparse, parallel 3305 3306 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3307 @*/ 3308 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3309 { 3310 PetscErrorCode ierr; 3311 PetscMPIInt size; 3312 3313 PetscFunctionBegin; 3314 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3315 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3316 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3317 if (size > 1) { 3318 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3319 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3320 } else { 3321 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3322 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3323 } 3324 PetscFunctionReturn(0); 3325 } 3326 3327 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3328 { 3329 Mat mat; 3330 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3331 PetscErrorCode ierr; 3332 PetscInt len=0; 3333 3334 PetscFunctionBegin; 3335 *newmat = 0; 3336 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3337 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3338 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3339 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3340 3341 mat->factortype = matin->factortype; 3342 mat->preallocated = PETSC_TRUE; 3343 mat->assembled = PETSC_TRUE; 3344 mat->insertmode = NOT_SET_VALUES; 3345 3346 a = (Mat_MPIBAIJ*)mat->data; 3347 mat->rmap->bs = matin->rmap->bs; 3348 a->bs2 = oldmat->bs2; 3349 a->mbs = oldmat->mbs; 3350 a->nbs = oldmat->nbs; 3351 a->Mbs = oldmat->Mbs; 3352 a->Nbs = oldmat->Nbs; 3353 3354 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3355 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3356 3357 a->size = oldmat->size; 3358 a->rank = oldmat->rank; 3359 a->donotstash = oldmat->donotstash; 3360 a->roworiented = oldmat->roworiented; 3361 a->rowindices = 0; 3362 a->rowvalues = 0; 3363 a->getrowactive = PETSC_FALSE; 3364 a->barray = 0; 3365 a->rstartbs = oldmat->rstartbs; 3366 a->rendbs = oldmat->rendbs; 3367 a->cstartbs = oldmat->cstartbs; 3368 a->cendbs = oldmat->cendbs; 3369 3370 /* hash table stuff */ 3371 a->ht = 0; 3372 a->hd = 0; 3373 a->ht_size = 0; 3374 a->ht_flag = oldmat->ht_flag; 3375 a->ht_fact = oldmat->ht_fact; 3376 a->ht_total_ct = 0; 3377 a->ht_insert_ct = 0; 3378 3379 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3380 if (oldmat->colmap) { 3381 #if defined(PETSC_USE_CTABLE) 3382 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3383 #else 3384 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 3385 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3386 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3387 #endif 3388 } else a->colmap = 0; 3389 3390 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3391 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 3392 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3393 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3394 } else a->garray = 0; 3395 3396 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3397 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3398 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3399 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3400 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3401 3402 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3403 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3404 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3405 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3406 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3407 *newmat = mat; 3408 PetscFunctionReturn(0); 3409 } 3410 3411 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3412 { 3413 PetscErrorCode ierr; 3414 int fd; 3415 PetscInt i,nz,j,rstart,rend; 3416 PetscScalar *vals,*buf; 3417 MPI_Comm comm; 3418 MPI_Status status; 3419 PetscMPIInt rank,size,maxnz; 3420 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3421 PetscInt *locrowlens = NULL,*procsnz = NULL,*browners = NULL; 3422 PetscInt jj,*mycols,*ibuf,bs = newmat->rmap->bs,Mbs,mbs,extra_rows,mmax; 3423 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3424 PetscInt *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount; 3425 PetscInt dcount,kmax,k,nzcount,tmp,mend; 3426 3427 PetscFunctionBegin; 3428 /* force binary viewer to load .info file if it has not yet done so */ 3429 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3430 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3431 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3432 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3433 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3434 if (bs < 0) bs = 1; 3435 3436 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3437 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3438 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3439 if (!rank) { 3440 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3441 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3442 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3443 } 3444 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3445 M = header[1]; N = header[2]; 3446 3447 /* If global sizes are set, check if they are consistent with that given in the file */ 3448 if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M); 3449 if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N); 3450 3451 if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices"); 3452 3453 /* 3454 This code adds extra rows to make sure the number of rows is 3455 divisible by the blocksize 3456 */ 3457 Mbs = M/bs; 3458 extra_rows = bs - M + bs*Mbs; 3459 if (extra_rows == bs) extra_rows = 0; 3460 else Mbs++; 3461 if (extra_rows && !rank) { 3462 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3463 } 3464 3465 /* determine ownership of all rows */ 3466 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3467 mbs = Mbs/size + ((Mbs % size) > rank); 3468 m = mbs*bs; 3469 } else { /* User set */ 3470 m = newmat->rmap->n; 3471 mbs = m/bs; 3472 } 3473 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 3474 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3475 3476 /* process 0 needs enough room for process with most rows */ 3477 if (!rank) { 3478 mmax = rowners[1]; 3479 for (i=2; i<=size; i++) { 3480 mmax = PetscMax(mmax,rowners[i]); 3481 } 3482 mmax*=bs; 3483 } else mmax = -1; /* unused, but compiler warns anyway */ 3484 3485 rowners[0] = 0; 3486 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3487 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3488 rstart = rowners[rank]; 3489 rend = rowners[rank+1]; 3490 3491 /* distribute row lengths to all processors */ 3492 ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr); 3493 if (!rank) { 3494 mend = m; 3495 if (size == 1) mend = mend - extra_rows; 3496 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3497 for (j=mend; j<m; j++) locrowlens[j] = 1; 3498 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3499 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3500 for (j=0; j<m; j++) { 3501 procsnz[0] += locrowlens[j]; 3502 } 3503 for (i=1; i<size; i++) { 3504 mend = browners[i+1] - browners[i]; 3505 if (i == size-1) mend = mend - extra_rows; 3506 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3507 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3508 /* calculate the number of nonzeros on each processor */ 3509 for (j=0; j<browners[i+1]-browners[i]; j++) { 3510 procsnz[i] += rowlengths[j]; 3511 } 3512 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3513 } 3514 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3515 } else { 3516 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3517 } 3518 3519 if (!rank) { 3520 /* determine max buffer needed and allocate it */ 3521 maxnz = procsnz[0]; 3522 for (i=1; i<size; i++) { 3523 maxnz = PetscMax(maxnz,procsnz[i]); 3524 } 3525 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3526 3527 /* read in my part of the matrix column indices */ 3528 nz = procsnz[0]; 3529 ierr = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr); 3530 mycols = ibuf; 3531 if (size == 1) nz -= extra_rows; 3532 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3533 if (size == 1) { 3534 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 3535 } 3536 3537 /* read in every ones (except the last) and ship off */ 3538 for (i=1; i<size-1; i++) { 3539 nz = procsnz[i]; 3540 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3541 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3542 } 3543 /* read in the stuff for the last proc */ 3544 if (size != 1) { 3545 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3546 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3547 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3548 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3549 } 3550 ierr = PetscFree(cols);CHKERRQ(ierr); 3551 } else { 3552 /* determine buffer space needed for message */ 3553 nz = 0; 3554 for (i=0; i<m; i++) { 3555 nz += locrowlens[i]; 3556 } 3557 ierr = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr); 3558 mycols = ibuf; 3559 /* receive message of column indices*/ 3560 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3561 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3562 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3563 } 3564 3565 /* loop over local rows, determining number of off diagonal entries */ 3566 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 3567 ierr = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 3568 rowcount = 0; nzcount = 0; 3569 for (i=0; i<mbs; i++) { 3570 dcount = 0; 3571 odcount = 0; 3572 for (j=0; j<bs; j++) { 3573 kmax = locrowlens[rowcount]; 3574 for (k=0; k<kmax; k++) { 3575 tmp = mycols[nzcount++]/bs; 3576 if (!mask[tmp]) { 3577 mask[tmp] = 1; 3578 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3579 else masked1[dcount++] = tmp; 3580 } 3581 } 3582 rowcount++; 3583 } 3584 3585 dlens[i] = dcount; 3586 odlens[i] = odcount; 3587 3588 /* zero out the mask elements we set */ 3589 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3590 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3591 } 3592 3593 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3594 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3595 3596 if (!rank) { 3597 ierr = PetscMalloc1(maxnz+1,&buf);CHKERRQ(ierr); 3598 /* read in my part of the matrix numerical values */ 3599 nz = procsnz[0]; 3600 vals = buf; 3601 mycols = ibuf; 3602 if (size == 1) nz -= extra_rows; 3603 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3604 if (size == 1) { 3605 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 3606 } 3607 3608 /* insert into matrix */ 3609 jj = rstart*bs; 3610 for (i=0; i<m; i++) { 3611 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3612 mycols += locrowlens[i]; 3613 vals += locrowlens[i]; 3614 jj++; 3615 } 3616 /* read in other processors (except the last one) and ship out */ 3617 for (i=1; i<size-1; i++) { 3618 nz = procsnz[i]; 3619 vals = buf; 3620 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3621 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3622 } 3623 /* the last proc */ 3624 if (size != 1) { 3625 nz = procsnz[i] - extra_rows; 3626 vals = buf; 3627 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3628 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3629 ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3630 } 3631 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3632 } else { 3633 /* receive numeric values */ 3634 ierr = PetscMalloc1(nz+1,&buf);CHKERRQ(ierr); 3635 3636 /* receive message of values*/ 3637 vals = buf; 3638 mycols = ibuf; 3639 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3640 3641 /* insert into matrix */ 3642 jj = rstart*bs; 3643 for (i=0; i<m; i++) { 3644 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3645 mycols += locrowlens[i]; 3646 vals += locrowlens[i]; 3647 jj++; 3648 } 3649 } 3650 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3651 ierr = PetscFree(buf);CHKERRQ(ierr); 3652 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3653 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3654 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3655 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3656 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3657 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3658 PetscFunctionReturn(0); 3659 } 3660 3661 /*@ 3662 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3663 3664 Input Parameters: 3665 . mat - the matrix 3666 . fact - factor 3667 3668 Not Collective, each process can use a different factor 3669 3670 Level: advanced 3671 3672 Notes: 3673 This can also be set by the command line option: -mat_use_hash_table <fact> 3674 3675 .keywords: matrix, hashtable, factor, HT 3676 3677 .seealso: MatSetOption() 3678 @*/ 3679 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3680 { 3681 PetscErrorCode ierr; 3682 3683 PetscFunctionBegin; 3684 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3685 PetscFunctionReturn(0); 3686 } 3687 3688 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3689 { 3690 Mat_MPIBAIJ *baij; 3691 3692 PetscFunctionBegin; 3693 baij = (Mat_MPIBAIJ*)mat->data; 3694 baij->ht_fact = fact; 3695 PetscFunctionReturn(0); 3696 } 3697 3698 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3699 { 3700 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3701 PetscBool flg; 3702 PetscErrorCode ierr; 3703 3704 PetscFunctionBegin; 3705 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg);CHKERRQ(ierr); 3706 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input"); 3707 if (Ad) *Ad = a->A; 3708 if (Ao) *Ao = a->B; 3709 if (colmap) *colmap = a->garray; 3710 PetscFunctionReturn(0); 3711 } 3712 3713 /* 3714 Special version for direct calls from Fortran (to eliminate two function call overheads 3715 */ 3716 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3717 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3718 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3719 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3720 #endif 3721 3722 /*@C 3723 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3724 3725 Collective on Mat 3726 3727 Input Parameters: 3728 + mat - the matrix 3729 . min - number of input rows 3730 . im - input rows 3731 . nin - number of input columns 3732 . in - input columns 3733 . v - numerical values input 3734 - addvin - INSERT_VALUES or ADD_VALUES 3735 3736 Notes: 3737 This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3738 3739 Level: advanced 3740 3741 .seealso: MatSetValuesBlocked() 3742 @*/ 3743 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3744 { 3745 /* convert input arguments to C version */ 3746 Mat mat = *matin; 3747 PetscInt m = *min, n = *nin; 3748 InsertMode addv = *addvin; 3749 3750 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3751 const MatScalar *value; 3752 MatScalar *barray = baij->barray; 3753 PetscBool roworiented = baij->roworiented; 3754 PetscErrorCode ierr; 3755 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3756 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3757 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3758 3759 PetscFunctionBegin; 3760 /* tasks normally handled by MatSetValuesBlocked() */ 3761 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3762 #if defined(PETSC_USE_DEBUG) 3763 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3764 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3765 #endif 3766 if (mat->assembled) { 3767 mat->was_assembled = PETSC_TRUE; 3768 mat->assembled = PETSC_FALSE; 3769 } 3770 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3771 3772 3773 if (!barray) { 3774 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 3775 baij->barray = barray; 3776 } 3777 3778 if (roworiented) stepval = (n-1)*bs; 3779 else stepval = (m-1)*bs; 3780 3781 for (i=0; i<m; i++) { 3782 if (im[i] < 0) continue; 3783 #if defined(PETSC_USE_DEBUG) 3784 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3785 #endif 3786 if (im[i] >= rstart && im[i] < rend) { 3787 row = im[i] - rstart; 3788 for (j=0; j<n; j++) { 3789 /* If NumCol = 1 then a copy is not required */ 3790 if ((roworiented) && (n == 1)) { 3791 barray = (MatScalar*)v + i*bs2; 3792 } else if ((!roworiented) && (m == 1)) { 3793 barray = (MatScalar*)v + j*bs2; 3794 } else { /* Here a copy is required */ 3795 if (roworiented) { 3796 value = v + i*(stepval+bs)*bs + j*bs; 3797 } else { 3798 value = v + j*(stepval+bs)*bs + i*bs; 3799 } 3800 for (ii=0; ii<bs; ii++,value+=stepval) { 3801 for (jj=0; jj<bs; jj++) { 3802 *barray++ = *value++; 3803 } 3804 } 3805 barray -=bs2; 3806 } 3807 3808 if (in[j] >= cstart && in[j] < cend) { 3809 col = in[j] - cstart; 3810 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 3811 } else if (in[j] < 0) continue; 3812 #if defined(PETSC_USE_DEBUG) 3813 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 3814 #endif 3815 else { 3816 if (mat->was_assembled) { 3817 if (!baij->colmap) { 3818 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3819 } 3820 3821 #if defined(PETSC_USE_DEBUG) 3822 #if defined(PETSC_USE_CTABLE) 3823 { PetscInt data; 3824 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3825 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3826 } 3827 #else 3828 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3829 #endif 3830 #endif 3831 #if defined(PETSC_USE_CTABLE) 3832 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3833 col = (col - 1)/bs; 3834 #else 3835 col = (baij->colmap[in[j]] - 1)/bs; 3836 #endif 3837 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3838 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3839 col = in[j]; 3840 } 3841 } else col = in[j]; 3842 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 3843 } 3844 } 3845 } else { 3846 if (!baij->donotstash) { 3847 if (roworiented) { 3848 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3849 } else { 3850 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3851 } 3852 } 3853 } 3854 } 3855 3856 /* task normally handled by MatSetValuesBlocked() */ 3857 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3858 PetscFunctionReturn(0); 3859 } 3860 3861 /*@ 3862 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 3863 CSR format the local rows. 3864 3865 Collective on MPI_Comm 3866 3867 Input Parameters: 3868 + comm - MPI communicator 3869 . bs - the block size, only a block size of 1 is supported 3870 . m - number of local rows (Cannot be PETSC_DECIDE) 3871 . n - This value should be the same as the local size used in creating the 3872 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3873 calculated if N is given) For square matrices n is almost always m. 3874 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3875 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3876 . i - row indices 3877 . j - column indices 3878 - a - matrix values 3879 3880 Output Parameter: 3881 . mat - the matrix 3882 3883 Level: intermediate 3884 3885 Notes: 3886 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3887 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3888 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3889 3890 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3891 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3892 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3893 with column-major ordering within blocks. 3894 3895 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3896 3897 .keywords: matrix, aij, compressed row, sparse, parallel 3898 3899 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3900 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3901 @*/ 3902 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3903 { 3904 PetscErrorCode ierr; 3905 3906 PetscFunctionBegin; 3907 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3908 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3909 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3910 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3911 ierr = MatSetType(*mat,MATMPIBAIJ);CHKERRQ(ierr); 3912 ierr = MatSetBlockSize(*mat,bs);CHKERRQ(ierr); 3913 ierr = MatSetUp(*mat);CHKERRQ(ierr); 3914 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3915 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3916 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3917 PetscFunctionReturn(0); 3918 } 3919 3920 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3921 { 3922 PetscErrorCode ierr; 3923 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3924 PetscInt *indx; 3925 PetscScalar *values; 3926 3927 PetscFunctionBegin; 3928 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3929 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3930 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inmat->data; 3931 PetscInt *dnz,*onz,mbs,Nbs,nbs; 3932 PetscInt *bindx,rmax=a->rmax,j; 3933 PetscMPIInt rank,size; 3934 3935 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3936 mbs = m/bs; Nbs = N/cbs; 3937 if (n == PETSC_DECIDE) { 3938 nbs = n; 3939 ierr = PetscSplitOwnership(comm,&nbs,&Nbs);CHKERRQ(ierr); 3940 n = nbs*cbs; 3941 } else { 3942 nbs = n/cbs; 3943 } 3944 3945 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 3946 ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */ 3947 3948 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3949 ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr); 3950 if (rank == size-1) { 3951 /* Check sum(nbs) = Nbs */ 3952 if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs); 3953 } 3954 3955 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */ 3956 for (i=0; i<mbs; i++) { 3957 ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3958 nnz = nnz/bs; 3959 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3960 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3961 ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3962 } 3963 ierr = PetscFree(bindx);CHKERRQ(ierr); 3964 3965 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3966 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3967 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3968 ierr = MatSetType(*outmat,MATBAIJ);CHKERRQ(ierr); 3969 ierr = MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr); 3970 ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3971 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3972 } 3973 3974 /* numeric phase */ 3975 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3976 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3977 3978 for (i=0; i<m; i++) { 3979 ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3980 Ii = i + rstart; 3981 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3982 ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3983 } 3984 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3985 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3986 PetscFunctionReturn(0); 3987 } 3988