1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 #undef __FUNCT__ 8 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 9 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 10 { 11 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 12 PetscErrorCode ierr; 13 PetscInt i,*idxb = 0; 14 PetscScalar *va,*vb; 15 Vec vtmp; 16 17 PetscFunctionBegin; 18 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 19 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 20 if (idx) { 21 for (i=0; i<A->rmap->n; i++) { 22 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 23 } 24 } 25 26 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 27 if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);} 28 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 29 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 30 31 for (i=0; i<A->rmap->n; i++) { 32 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 33 va[i] = vb[i]; 34 if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs); 35 } 36 } 37 38 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 39 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 40 ierr = PetscFree(idxb);CHKERRQ(ierr); 41 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 42 PetscFunctionReturn(0); 43 } 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 47 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 48 { 49 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 54 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 55 PetscFunctionReturn(0); 56 } 57 58 #undef __FUNCT__ 59 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 60 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 61 { 62 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 63 PetscErrorCode ierr; 64 65 PetscFunctionBegin; 66 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 67 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 68 PetscFunctionReturn(0); 69 } 70 71 /* 72 Local utility routine that creates a mapping from the global column 73 number to the local number in the off-diagonal part of the local 74 storage of the matrix. This is done in a non scalable way since the 75 length of colmap equals the global matrix length. 76 */ 77 #undef __FUNCT__ 78 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private" 79 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 80 { 81 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 82 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 83 PetscErrorCode ierr; 84 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 85 86 PetscFunctionBegin; 87 #if defined(PETSC_USE_CTABLE) 88 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 89 for (i=0; i<nbs; i++) { 90 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 91 } 92 #else 93 ierr = PetscMalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 94 ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 95 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 96 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 97 #endif 98 PetscFunctionReturn(0); 99 } 100 101 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 102 { \ 103 \ 104 brow = row/bs; \ 105 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 106 rmax = aimax[brow]; nrow = ailen[brow]; \ 107 bcol = col/bs; \ 108 ridx = row % bs; cidx = col % bs; \ 109 low = 0; high = nrow; \ 110 while (high-low > 3) { \ 111 t = (low+high)/2; \ 112 if (rp[t] > bcol) high = t; \ 113 else low = t; \ 114 } \ 115 for (_i=low; _i<high; _i++) { \ 116 if (rp[_i] > bcol) break; \ 117 if (rp[_i] == bcol) { \ 118 bap = ap + bs2*_i + bs*cidx + ridx; \ 119 if (addv == ADD_VALUES) *bap += value; \ 120 else *bap = value; \ 121 goto a_noinsert; \ 122 } \ 123 } \ 124 if (a->nonew == 1) goto a_noinsert; \ 125 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 126 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 127 N = nrow++ - 1; \ 128 /* shift up all the later entries in this row */ \ 129 for (ii=N; ii>=_i; ii--) { \ 130 rp[ii+1] = rp[ii]; \ 131 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 132 } \ 133 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 134 rp[_i] = bcol; \ 135 ap[bs2*_i + bs*cidx + ridx] = value; \ 136 a_noinsert:; \ 137 ailen[brow] = nrow; \ 138 } 139 140 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 141 { \ 142 brow = row/bs; \ 143 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 144 rmax = bimax[brow]; nrow = bilen[brow]; \ 145 bcol = col/bs; \ 146 ridx = row % bs; cidx = col % bs; \ 147 low = 0; high = nrow; \ 148 while (high-low > 3) { \ 149 t = (low+high)/2; \ 150 if (rp[t] > bcol) high = t; \ 151 else low = t; \ 152 } \ 153 for (_i=low; _i<high; _i++) { \ 154 if (rp[_i] > bcol) break; \ 155 if (rp[_i] == bcol) { \ 156 bap = ap + bs2*_i + bs*cidx + ridx; \ 157 if (addv == ADD_VALUES) *bap += value; \ 158 else *bap = value; \ 159 goto b_noinsert; \ 160 } \ 161 } \ 162 if (b->nonew == 1) goto b_noinsert; \ 163 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 164 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 165 N = nrow++ - 1; \ 166 /* shift up all the later entries in this row */ \ 167 for (ii=N; ii>=_i; ii--) { \ 168 rp[ii+1] = rp[ii]; \ 169 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 170 } \ 171 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 172 rp[_i] = bcol; \ 173 ap[bs2*_i + bs*cidx + ridx] = value; \ 174 b_noinsert:; \ 175 bilen[brow] = nrow; \ 176 } 177 178 #undef __FUNCT__ 179 #define __FUNCT__ "MatSetValues_MPIBAIJ" 180 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 181 { 182 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 183 MatScalar value; 184 PetscBool roworiented = baij->roworiented; 185 PetscErrorCode ierr; 186 PetscInt i,j,row,col; 187 PetscInt rstart_orig=mat->rmap->rstart; 188 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 189 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 190 191 /* Some Variables required in the macro */ 192 Mat A = baij->A; 193 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 194 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 195 MatScalar *aa =a->a; 196 197 Mat B = baij->B; 198 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 199 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 200 MatScalar *ba =b->a; 201 202 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 203 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 204 MatScalar *ap,*bap; 205 206 PetscFunctionBegin; 207 for (i=0; i<m; i++) { 208 if (im[i] < 0) continue; 209 #if defined(PETSC_USE_DEBUG) 210 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 211 #endif 212 if (im[i] >= rstart_orig && im[i] < rend_orig) { 213 row = im[i] - rstart_orig; 214 for (j=0; j<n; j++) { 215 if (in[j] >= cstart_orig && in[j] < cend_orig) { 216 col = in[j] - cstart_orig; 217 if (roworiented) value = v[i*n+j]; 218 else value = v[i+j*m]; 219 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 220 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 221 } else if (in[j] < 0) continue; 222 #if defined(PETSC_USE_DEBUG) 223 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 224 #endif 225 else { 226 if (mat->was_assembled) { 227 if (!baij->colmap) { 228 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 229 } 230 #if defined(PETSC_USE_CTABLE) 231 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 232 col = col - 1; 233 #else 234 col = baij->colmap[in[j]/bs] - 1; 235 #endif 236 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 237 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 238 col = in[j]; 239 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 240 B = baij->B; 241 b = (Mat_SeqBAIJ*)(B)->data; 242 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 243 ba =b->a; 244 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 245 else col += in[j]%bs; 246 } else col = in[j]; 247 if (roworiented) value = v[i*n+j]; 248 else value = v[i+j*m]; 249 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 250 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 251 } 252 } 253 } else { 254 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 255 if (!baij->donotstash) { 256 mat->assembled = PETSC_FALSE; 257 if (roworiented) { 258 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 259 } else { 260 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 261 } 262 } 263 } 264 } 265 PetscFunctionReturn(0); 266 } 267 268 #undef __FUNCT__ 269 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ_Inlined" 270 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 271 { 272 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 273 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 274 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 275 PetscErrorCode ierr; 276 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 277 PetscBool roworiented=a->roworiented; 278 const PetscScalar *value = v; 279 MatScalar *ap,*aa = a->a,*bap; 280 281 PetscFunctionBegin; 282 rp = aj + ai[row]; 283 ap = aa + bs2*ai[row]; 284 rmax = imax[row]; 285 nrow = ailen[row]; 286 low = 0; 287 high = nrow; 288 value = v; 289 low = 0; 290 high = nrow; 291 while (high-low > 7) { 292 t = (low+high)/2; 293 if (rp[t] > col) high = t; 294 else low = t; 295 } 296 for (i=low; i<high; i++) { 297 if (rp[i] > col) break; 298 if (rp[i] == col) { 299 bap = ap + bs2*i; 300 if (roworiented) { 301 if (is == ADD_VALUES) { 302 for (ii=0; ii<bs; ii++) { 303 for (jj=ii; jj<bs2; jj+=bs) { 304 bap[jj] += *value++; 305 } 306 } 307 } else { 308 for (ii=0; ii<bs; ii++) { 309 for (jj=ii; jj<bs2; jj+=bs) { 310 bap[jj] = *value++; 311 } 312 } 313 } 314 } else { 315 if (is == ADD_VALUES) { 316 for (ii=0; ii<bs; ii++,value+=bs) { 317 for (jj=0; jj<bs; jj++) { 318 bap[jj] += value[jj]; 319 } 320 bap += bs; 321 } 322 } else { 323 for (ii=0; ii<bs; ii++,value+=bs) { 324 for (jj=0; jj<bs; jj++) { 325 bap[jj] = value[jj]; 326 } 327 bap += bs; 328 } 329 } 330 } 331 goto noinsert2; 332 } 333 } 334 if (nonew == 1) goto noinsert2; 335 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 336 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 337 N = nrow++ - 1; high++; 338 /* shift up all the later entries in this row */ 339 for (ii=N; ii>=i; ii--) { 340 rp[ii+1] = rp[ii]; 341 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 342 } 343 if (N >= i) { 344 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 345 } 346 rp[i] = col; 347 bap = ap + bs2*i; 348 if (roworiented) { 349 for (ii=0; ii<bs; ii++) { 350 for (jj=ii; jj<bs2; jj+=bs) { 351 bap[jj] = *value++; 352 } 353 } 354 } else { 355 for (ii=0; ii<bs; ii++) { 356 for (jj=0; jj<bs; jj++) { 357 *bap++ = *value++; 358 } 359 } 360 } 361 noinsert2:; 362 ailen[row] = nrow; 363 PetscFunctionReturn(0); 364 } 365 366 #undef __FUNCT__ 367 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 368 /* 369 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 370 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 371 */ 372 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 373 { 374 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 375 const PetscScalar *value; 376 MatScalar *barray = baij->barray; 377 PetscBool roworiented = baij->roworiented; 378 PetscErrorCode ierr; 379 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 380 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 381 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 382 383 PetscFunctionBegin; 384 if (!barray) { 385 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 386 baij->barray = barray; 387 } 388 389 if (roworiented) stepval = (n-1)*bs; 390 else stepval = (m-1)*bs; 391 392 for (i=0; i<m; i++) { 393 if (im[i] < 0) continue; 394 #if defined(PETSC_USE_DEBUG) 395 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 396 #endif 397 if (im[i] >= rstart && im[i] < rend) { 398 row = im[i] - rstart; 399 for (j=0; j<n; j++) { 400 /* If NumCol = 1 then a copy is not required */ 401 if ((roworiented) && (n == 1)) { 402 barray = (MatScalar*)v + i*bs2; 403 } else if ((!roworiented) && (m == 1)) { 404 barray = (MatScalar*)v + j*bs2; 405 } else { /* Here a copy is required */ 406 if (roworiented) { 407 value = v + (i*(stepval+bs) + j)*bs; 408 } else { 409 value = v + (j*(stepval+bs) + i)*bs; 410 } 411 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 412 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 413 barray += bs; 414 } 415 barray -= bs2; 416 } 417 418 if (in[j] >= cstart && in[j] < cend) { 419 col = in[j] - cstart; 420 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 421 } else if (in[j] < 0) continue; 422 #if defined(PETSC_USE_DEBUG) 423 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 424 #endif 425 else { 426 if (mat->was_assembled) { 427 if (!baij->colmap) { 428 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 429 } 430 431 #if defined(PETSC_USE_DEBUG) 432 #if defined(PETSC_USE_CTABLE) 433 { PetscInt data; 434 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 435 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 436 } 437 #else 438 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 439 #endif 440 #endif 441 #if defined(PETSC_USE_CTABLE) 442 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 443 col = (col - 1)/bs; 444 #else 445 col = (baij->colmap[in[j]] - 1)/bs; 446 #endif 447 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 448 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 449 col = in[j]; 450 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]); 451 } else col = in[j]; 452 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 453 } 454 } 455 } else { 456 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 457 if (!baij->donotstash) { 458 if (roworiented) { 459 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 460 } else { 461 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 462 } 463 } 464 } 465 } 466 PetscFunctionReturn(0); 467 } 468 469 #define HASH_KEY 0.6180339887 470 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 471 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 472 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 473 #undef __FUNCT__ 474 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 475 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 476 { 477 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 478 PetscBool roworiented = baij->roworiented; 479 PetscErrorCode ierr; 480 PetscInt i,j,row,col; 481 PetscInt rstart_orig=mat->rmap->rstart; 482 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 483 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 484 PetscReal tmp; 485 MatScalar **HD = baij->hd,value; 486 #if defined(PETSC_USE_DEBUG) 487 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 488 #endif 489 490 PetscFunctionBegin; 491 for (i=0; i<m; i++) { 492 #if defined(PETSC_USE_DEBUG) 493 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 494 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 495 #endif 496 row = im[i]; 497 if (row >= rstart_orig && row < rend_orig) { 498 for (j=0; j<n; j++) { 499 col = in[j]; 500 if (roworiented) value = v[i*n+j]; 501 else value = v[i+j*m]; 502 /* Look up PetscInto the Hash Table */ 503 key = (row/bs)*Nbs+(col/bs)+1; 504 h1 = HASH(size,key,tmp); 505 506 507 idx = h1; 508 #if defined(PETSC_USE_DEBUG) 509 insert_ct++; 510 total_ct++; 511 if (HT[idx] != key) { 512 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 513 if (idx == size) { 514 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 515 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 516 } 517 } 518 #else 519 if (HT[idx] != key) { 520 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 521 if (idx == size) { 522 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 523 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 524 } 525 } 526 #endif 527 /* A HASH table entry is found, so insert the values at the correct address */ 528 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 529 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 530 } 531 } else if (!baij->donotstash) { 532 if (roworiented) { 533 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 534 } else { 535 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 536 } 537 } 538 } 539 #if defined(PETSC_USE_DEBUG) 540 baij->ht_total_ct = total_ct; 541 baij->ht_insert_ct = insert_ct; 542 #endif 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 548 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 549 { 550 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 551 PetscBool roworiented = baij->roworiented; 552 PetscErrorCode ierr; 553 PetscInt i,j,ii,jj,row,col; 554 PetscInt rstart=baij->rstartbs; 555 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 556 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 557 PetscReal tmp; 558 MatScalar **HD = baij->hd,*baij_a; 559 const PetscScalar *v_t,*value; 560 #if defined(PETSC_USE_DEBUG) 561 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 562 #endif 563 564 PetscFunctionBegin; 565 if (roworiented) stepval = (n-1)*bs; 566 else stepval = (m-1)*bs; 567 568 for (i=0; i<m; i++) { 569 #if defined(PETSC_USE_DEBUG) 570 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 571 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 572 #endif 573 row = im[i]; 574 v_t = v + i*nbs2; 575 if (row >= rstart && row < rend) { 576 for (j=0; j<n; j++) { 577 col = in[j]; 578 579 /* Look up into the Hash Table */ 580 key = row*Nbs+col+1; 581 h1 = HASH(size,key,tmp); 582 583 idx = h1; 584 #if defined(PETSC_USE_DEBUG) 585 total_ct++; 586 insert_ct++; 587 if (HT[idx] != key) { 588 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 589 if (idx == size) { 590 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 591 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 592 } 593 } 594 #else 595 if (HT[idx] != key) { 596 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 597 if (idx == size) { 598 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 599 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 600 } 601 } 602 #endif 603 baij_a = HD[idx]; 604 if (roworiented) { 605 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 606 /* value = v + (i*(stepval+bs)+j)*bs; */ 607 value = v_t; 608 v_t += bs; 609 if (addv == ADD_VALUES) { 610 for (ii=0; ii<bs; ii++,value+=stepval) { 611 for (jj=ii; jj<bs2; jj+=bs) { 612 baij_a[jj] += *value++; 613 } 614 } 615 } else { 616 for (ii=0; ii<bs; ii++,value+=stepval) { 617 for (jj=ii; jj<bs2; jj+=bs) { 618 baij_a[jj] = *value++; 619 } 620 } 621 } 622 } else { 623 value = v + j*(stepval+bs)*bs + i*bs; 624 if (addv == ADD_VALUES) { 625 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 626 for (jj=0; jj<bs; jj++) { 627 baij_a[jj] += *value++; 628 } 629 } 630 } else { 631 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 632 for (jj=0; jj<bs; jj++) { 633 baij_a[jj] = *value++; 634 } 635 } 636 } 637 } 638 } 639 } else { 640 if (!baij->donotstash) { 641 if (roworiented) { 642 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 643 } else { 644 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 645 } 646 } 647 } 648 } 649 #if defined(PETSC_USE_DEBUG) 650 baij->ht_total_ct = total_ct; 651 baij->ht_insert_ct = insert_ct; 652 #endif 653 PetscFunctionReturn(0); 654 } 655 656 #undef __FUNCT__ 657 #define __FUNCT__ "MatGetValues_MPIBAIJ" 658 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 659 { 660 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 661 PetscErrorCode ierr; 662 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 663 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 664 665 PetscFunctionBegin; 666 for (i=0; i<m; i++) { 667 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 668 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 669 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 670 row = idxm[i] - bsrstart; 671 for (j=0; j<n; j++) { 672 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 673 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 674 if (idxn[j] >= bscstart && idxn[j] < bscend) { 675 col = idxn[j] - bscstart; 676 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 677 } else { 678 if (!baij->colmap) { 679 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 680 } 681 #if defined(PETSC_USE_CTABLE) 682 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 683 data--; 684 #else 685 data = baij->colmap[idxn[j]/bs]-1; 686 #endif 687 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 688 else { 689 col = data + idxn[j]%bs; 690 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 691 } 692 } 693 } 694 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 695 } 696 PetscFunctionReturn(0); 697 } 698 699 #undef __FUNCT__ 700 #define __FUNCT__ "MatNorm_MPIBAIJ" 701 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 702 { 703 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 704 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 705 PetscErrorCode ierr; 706 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 707 PetscReal sum = 0.0; 708 MatScalar *v; 709 710 PetscFunctionBegin; 711 if (baij->size == 1) { 712 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 713 } else { 714 if (type == NORM_FROBENIUS) { 715 v = amat->a; 716 nz = amat->nz*bs2; 717 for (i=0; i<nz; i++) { 718 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 719 } 720 v = bmat->a; 721 nz = bmat->nz*bs2; 722 for (i=0; i<nz; i++) { 723 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 724 } 725 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 726 *nrm = PetscSqrtReal(*nrm); 727 } else if (type == NORM_1) { /* max column sum */ 728 PetscReal *tmp,*tmp2; 729 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 730 ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr); 731 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 732 v = amat->a; jj = amat->j; 733 for (i=0; i<amat->nz; i++) { 734 for (j=0; j<bs; j++) { 735 col = bs*(cstart + *jj) + j; /* column index */ 736 for (row=0; row<bs; row++) { 737 tmp[col] += PetscAbsScalar(*v); v++; 738 } 739 } 740 jj++; 741 } 742 v = bmat->a; jj = bmat->j; 743 for (i=0; i<bmat->nz; i++) { 744 for (j=0; j<bs; j++) { 745 col = bs*garray[*jj] + j; 746 for (row=0; row<bs; row++) { 747 tmp[col] += PetscAbsScalar(*v); v++; 748 } 749 } 750 jj++; 751 } 752 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 753 *nrm = 0.0; 754 for (j=0; j<mat->cmap->N; j++) { 755 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 756 } 757 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 758 } else if (type == NORM_INFINITY) { /* max row sum */ 759 PetscReal *sums; 760 ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr); 761 sum = 0.0; 762 for (j=0; j<amat->mbs; j++) { 763 for (row=0; row<bs; row++) sums[row] = 0.0; 764 v = amat->a + bs2*amat->i[j]; 765 nz = amat->i[j+1]-amat->i[j]; 766 for (i=0; i<nz; i++) { 767 for (col=0; col<bs; col++) { 768 for (row=0; row<bs; row++) { 769 sums[row] += PetscAbsScalar(*v); v++; 770 } 771 } 772 } 773 v = bmat->a + bs2*bmat->i[j]; 774 nz = bmat->i[j+1]-bmat->i[j]; 775 for (i=0; i<nz; i++) { 776 for (col=0; col<bs; col++) { 777 for (row=0; row<bs; row++) { 778 sums[row] += PetscAbsScalar(*v); v++; 779 } 780 } 781 } 782 for (row=0; row<bs; row++) { 783 if (sums[row] > sum) sum = sums[row]; 784 } 785 } 786 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 787 ierr = PetscFree(sums);CHKERRQ(ierr); 788 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 789 } 790 PetscFunctionReturn(0); 791 } 792 793 /* 794 Creates the hash table, and sets the table 795 This table is created only once. 796 If new entried need to be added to the matrix 797 then the hash table has to be destroyed and 798 recreated. 799 */ 800 #undef __FUNCT__ 801 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 802 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 803 { 804 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 805 Mat A = baij->A,B=baij->B; 806 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 807 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 808 PetscErrorCode ierr; 809 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 810 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 811 PetscInt *HT,key; 812 MatScalar **HD; 813 PetscReal tmp; 814 #if defined(PETSC_USE_INFO) 815 PetscInt ct=0,max=0; 816 #endif 817 818 PetscFunctionBegin; 819 if (baij->ht) PetscFunctionReturn(0); 820 821 baij->ht_size = (PetscInt)(factor*nz); 822 ht_size = baij->ht_size; 823 824 /* Allocate Memory for Hash Table */ 825 ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr); 826 HD = baij->hd; 827 HT = baij->ht; 828 829 /* Loop Over A */ 830 for (i=0; i<a->mbs; i++) { 831 for (j=ai[i]; j<ai[i+1]; j++) { 832 row = i+rstart; 833 col = aj[j]+cstart; 834 835 key = row*Nbs + col + 1; 836 h1 = HASH(ht_size,key,tmp); 837 for (k=0; k<ht_size; k++) { 838 if (!HT[(h1+k)%ht_size]) { 839 HT[(h1+k)%ht_size] = key; 840 HD[(h1+k)%ht_size] = a->a + j*bs2; 841 break; 842 #if defined(PETSC_USE_INFO) 843 } else { 844 ct++; 845 #endif 846 } 847 } 848 #if defined(PETSC_USE_INFO) 849 if (k> max) max = k; 850 #endif 851 } 852 } 853 /* Loop Over B */ 854 for (i=0; i<b->mbs; i++) { 855 for (j=bi[i]; j<bi[i+1]; j++) { 856 row = i+rstart; 857 col = garray[bj[j]]; 858 key = row*Nbs + col + 1; 859 h1 = HASH(ht_size,key,tmp); 860 for (k=0; k<ht_size; k++) { 861 if (!HT[(h1+k)%ht_size]) { 862 HT[(h1+k)%ht_size] = key; 863 HD[(h1+k)%ht_size] = b->a + j*bs2; 864 break; 865 #if defined(PETSC_USE_INFO) 866 } else { 867 ct++; 868 #endif 869 } 870 } 871 #if defined(PETSC_USE_INFO) 872 if (k> max) max = k; 873 #endif 874 } 875 } 876 877 /* Print Summary */ 878 #if defined(PETSC_USE_INFO) 879 for (i=0,j=0; i<ht_size; i++) { 880 if (HT[i]) j++; 881 } 882 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 883 #endif 884 PetscFunctionReturn(0); 885 } 886 887 #undef __FUNCT__ 888 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 889 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 890 { 891 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 892 PetscErrorCode ierr; 893 PetscInt nstash,reallocs; 894 895 PetscFunctionBegin; 896 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 897 898 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 899 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 900 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 901 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 902 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 903 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 904 PetscFunctionReturn(0); 905 } 906 907 #undef __FUNCT__ 908 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 909 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 910 { 911 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 912 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 913 PetscErrorCode ierr; 914 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 915 PetscInt *row,*col; 916 PetscBool r1,r2,r3,other_disassembled; 917 MatScalar *val; 918 PetscMPIInt n; 919 920 PetscFunctionBegin; 921 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 922 if (!baij->donotstash && !mat->nooffprocentries) { 923 while (1) { 924 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 925 if (!flg) break; 926 927 for (i=0; i<n;) { 928 /* Now identify the consecutive vals belonging to the same row */ 929 for (j=i,rstart=row[j]; j<n; j++) { 930 if (row[j] != rstart) break; 931 } 932 if (j < n) ncols = j-i; 933 else ncols = n-i; 934 /* Now assemble all these values with a single function call */ 935 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 936 i = j; 937 } 938 } 939 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 940 /* Now process the block-stash. Since the values are stashed column-oriented, 941 set the roworiented flag to column oriented, and after MatSetValues() 942 restore the original flags */ 943 r1 = baij->roworiented; 944 r2 = a->roworiented; 945 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 946 947 baij->roworiented = PETSC_FALSE; 948 a->roworiented = PETSC_FALSE; 949 950 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 951 while (1) { 952 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 953 if (!flg) break; 954 955 for (i=0; i<n;) { 956 /* Now identify the consecutive vals belonging to the same row */ 957 for (j=i,rstart=row[j]; j<n; j++) { 958 if (row[j] != rstart) break; 959 } 960 if (j < n) ncols = j-i; 961 else ncols = n-i; 962 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 963 i = j; 964 } 965 } 966 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 967 968 baij->roworiented = r1; 969 a->roworiented = r2; 970 971 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 972 } 973 974 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 975 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 976 977 /* determine if any processor has disassembled, if so we must 978 also disassemble ourselfs, in order that we may reassemble. */ 979 /* 980 if nonzero structure of submatrix B cannot change then we know that 981 no processor disassembled thus we can skip this stuff 982 */ 983 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 984 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 985 if (mat->was_assembled && !other_disassembled) { 986 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 987 } 988 } 989 990 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 991 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 992 } 993 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 994 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 995 996 #if defined(PETSC_USE_INFO) 997 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 998 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 999 1000 baij->ht_total_ct = 0; 1001 baij->ht_insert_ct = 0; 1002 } 1003 #endif 1004 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1005 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1006 1007 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1008 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1009 } 1010 1011 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1012 1013 baij->rowvalues = 0; 1014 1015 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 1016 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 1017 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 1018 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1019 } 1020 PetscFunctionReturn(0); 1021 } 1022 1023 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer); 1024 #include <petscdraw.h> 1025 #undef __FUNCT__ 1026 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 1027 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1028 { 1029 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1030 PetscErrorCode ierr; 1031 PetscMPIInt rank = baij->rank; 1032 PetscInt bs = mat->rmap->bs; 1033 PetscBool iascii,isdraw; 1034 PetscViewer sviewer; 1035 PetscViewerFormat format; 1036 1037 PetscFunctionBegin; 1038 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1039 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1040 if (iascii) { 1041 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1042 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1043 MatInfo info; 1044 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1045 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1046 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1047 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 1048 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 1049 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1050 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1051 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1052 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1053 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1054 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1055 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1056 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1057 PetscFunctionReturn(0); 1058 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1059 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1060 PetscFunctionReturn(0); 1061 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1062 PetscFunctionReturn(0); 1063 } 1064 } 1065 1066 if (isdraw) { 1067 PetscDraw draw; 1068 PetscBool isnull; 1069 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1070 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1071 if (isnull) PetscFunctionReturn(0); 1072 } 1073 1074 { 1075 /* assemble the entire matrix onto first processor. */ 1076 Mat A; 1077 Mat_SeqBAIJ *Aloc; 1078 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1079 MatScalar *a; 1080 const char *matname; 1081 1082 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1083 /* Perhaps this should be the type of mat? */ 1084 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1085 if (!rank) { 1086 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1087 } else { 1088 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1089 } 1090 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1091 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 1092 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1093 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1094 1095 /* copy over the A part */ 1096 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1097 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1098 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1099 1100 for (i=0; i<mbs; i++) { 1101 rvals[0] = bs*(baij->rstartbs + i); 1102 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1103 for (j=ai[i]; j<ai[i+1]; j++) { 1104 col = (baij->cstartbs+aj[j])*bs; 1105 for (k=0; k<bs; k++) { 1106 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1107 col++; a += bs; 1108 } 1109 } 1110 } 1111 /* copy over the B part */ 1112 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1113 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1114 for (i=0; i<mbs; i++) { 1115 rvals[0] = bs*(baij->rstartbs + i); 1116 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1117 for (j=ai[i]; j<ai[i+1]; j++) { 1118 col = baij->garray[aj[j]]*bs; 1119 for (k=0; k<bs; k++) { 1120 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1121 col++; a += bs; 1122 } 1123 } 1124 } 1125 ierr = PetscFree(rvals);CHKERRQ(ierr); 1126 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1127 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1128 /* 1129 Everyone has to call to draw the matrix since the graphics waits are 1130 synchronized across all processors that share the PetscDraw object 1131 */ 1132 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1133 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 1134 if (!rank) { 1135 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 1136 ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1137 } 1138 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1139 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1140 ierr = MatDestroy(&A);CHKERRQ(ierr); 1141 } 1142 PetscFunctionReturn(0); 1143 } 1144 1145 #undef __FUNCT__ 1146 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1147 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1148 { 1149 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1150 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)a->A->data; 1151 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 1152 PetscErrorCode ierr; 1153 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1154 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1155 int fd; 1156 PetscScalar *column_values; 1157 FILE *file; 1158 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1159 PetscInt message_count,flowcontrolcount; 1160 1161 PetscFunctionBegin; 1162 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1163 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1164 nz = bs2*(A->nz + B->nz); 1165 rlen = mat->rmap->n; 1166 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1167 if (!rank) { 1168 header[0] = MAT_FILE_CLASSID; 1169 header[1] = mat->rmap->N; 1170 header[2] = mat->cmap->N; 1171 1172 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1173 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1174 /* get largest number of rows any processor has */ 1175 range = mat->rmap->range; 1176 for (i=1; i<size; i++) { 1177 rlen = PetscMax(rlen,range[i+1] - range[i]); 1178 } 1179 } else { 1180 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1181 } 1182 1183 ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr); 1184 /* compute lengths of each row */ 1185 for (i=0; i<a->mbs; i++) { 1186 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1187 } 1188 /* store the row lengths to the file */ 1189 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1190 if (!rank) { 1191 MPI_Status status; 1192 ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr); 1193 rlen = (range[1] - range[0])/bs; 1194 for (i=0; i<rlen; i++) { 1195 for (j=0; j<bs; j++) { 1196 row_lens[i*bs+j] = bs*crow_lens[i]; 1197 } 1198 } 1199 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1200 for (i=1; i<size; i++) { 1201 rlen = (range[i+1] - range[i])/bs; 1202 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1203 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1204 for (k=0; k<rlen; k++) { 1205 for (j=0; j<bs; j++) { 1206 row_lens[k*bs+j] = bs*crow_lens[k]; 1207 } 1208 } 1209 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1210 } 1211 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1212 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1213 } else { 1214 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1215 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1216 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1217 } 1218 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1219 1220 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1221 information needed to make it for each row from a block row. This does require more communication but still not more than 1222 the communication needed for the nonzero values */ 1223 nzmax = nz; /* space a largest processor needs */ 1224 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1225 ierr = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr); 1226 cnt = 0; 1227 for (i=0; i<a->mbs; i++) { 1228 pcnt = cnt; 1229 for (j=B->i[i]; j<B->i[i+1]; j++) { 1230 if ((col = garray[B->j[j]]) > cstart) break; 1231 for (l=0; l<bs; l++) { 1232 column_indices[cnt++] = bs*col+l; 1233 } 1234 } 1235 for (k=A->i[i]; k<A->i[i+1]; k++) { 1236 for (l=0; l<bs; l++) { 1237 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1238 } 1239 } 1240 for (; j<B->i[i+1]; j++) { 1241 for (l=0; l<bs; l++) { 1242 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1243 } 1244 } 1245 len = cnt - pcnt; 1246 for (k=1; k<bs; k++) { 1247 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1248 cnt += len; 1249 } 1250 } 1251 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1252 1253 /* store the columns to the file */ 1254 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1255 if (!rank) { 1256 MPI_Status status; 1257 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1258 for (i=1; i<size; i++) { 1259 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1260 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1261 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1262 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1263 } 1264 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1265 } else { 1266 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1267 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1268 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1269 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1270 } 1271 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1272 1273 /* load up the numerical values */ 1274 ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr); 1275 cnt = 0; 1276 for (i=0; i<a->mbs; i++) { 1277 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1278 for (j=B->i[i]; j<B->i[i+1]; j++) { 1279 if (garray[B->j[j]] > cstart) break; 1280 for (l=0; l<bs; l++) { 1281 for (ll=0; ll<bs; ll++) { 1282 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1283 } 1284 } 1285 cnt += bs; 1286 } 1287 for (k=A->i[i]; k<A->i[i+1]; k++) { 1288 for (l=0; l<bs; l++) { 1289 for (ll=0; ll<bs; ll++) { 1290 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1291 } 1292 } 1293 cnt += bs; 1294 } 1295 for (; j<B->i[i+1]; j++) { 1296 for (l=0; l<bs; l++) { 1297 for (ll=0; ll<bs; ll++) { 1298 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1299 } 1300 } 1301 cnt += bs; 1302 } 1303 cnt += (bs-1)*rlen; 1304 } 1305 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1306 1307 /* store the column values to the file */ 1308 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1309 if (!rank) { 1310 MPI_Status status; 1311 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1312 for (i=1; i<size; i++) { 1313 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1314 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1315 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1316 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1317 } 1318 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1319 } else { 1320 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1321 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1322 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1323 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1324 } 1325 ierr = PetscFree(column_values);CHKERRQ(ierr); 1326 1327 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1328 if (file) { 1329 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1330 } 1331 PetscFunctionReturn(0); 1332 } 1333 1334 #undef __FUNCT__ 1335 #define __FUNCT__ "MatView_MPIBAIJ" 1336 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1337 { 1338 PetscErrorCode ierr; 1339 PetscBool iascii,isdraw,issocket,isbinary; 1340 1341 PetscFunctionBegin; 1342 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1343 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1344 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1345 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1346 if (iascii || isdraw || issocket) { 1347 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1348 } else if (isbinary) { 1349 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1350 } 1351 PetscFunctionReturn(0); 1352 } 1353 1354 #undef __FUNCT__ 1355 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1356 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1357 { 1358 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1359 PetscErrorCode ierr; 1360 1361 PetscFunctionBegin; 1362 #if defined(PETSC_USE_LOG) 1363 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1364 #endif 1365 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1366 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1367 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1368 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1369 #if defined(PETSC_USE_CTABLE) 1370 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1371 #else 1372 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1373 #endif 1374 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1375 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1376 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1377 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1378 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1379 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1380 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1381 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1382 1383 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1384 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1385 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1386 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1387 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1388 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1389 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1390 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr); 1391 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1392 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr); 1393 PetscFunctionReturn(0); 1394 } 1395 1396 #undef __FUNCT__ 1397 #define __FUNCT__ "MatMult_MPIBAIJ" 1398 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1399 { 1400 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1401 PetscErrorCode ierr; 1402 PetscInt nt; 1403 1404 PetscFunctionBegin; 1405 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1406 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1407 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1408 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1409 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1410 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1411 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1412 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1413 PetscFunctionReturn(0); 1414 } 1415 1416 #undef __FUNCT__ 1417 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1418 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1419 { 1420 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1421 PetscErrorCode ierr; 1422 1423 PetscFunctionBegin; 1424 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1425 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1426 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1427 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1428 PetscFunctionReturn(0); 1429 } 1430 1431 #undef __FUNCT__ 1432 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1433 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1434 { 1435 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1436 PetscErrorCode ierr; 1437 PetscBool merged; 1438 1439 PetscFunctionBegin; 1440 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1441 /* do nondiagonal part */ 1442 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1443 if (!merged) { 1444 /* send it on its way */ 1445 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1446 /* do local part */ 1447 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1448 /* receive remote parts: note this assumes the values are not actually */ 1449 /* inserted in yy until the next line */ 1450 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1451 } else { 1452 /* do local part */ 1453 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1454 /* send it on its way */ 1455 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1456 /* values actually were received in the Begin() but we need to call this nop */ 1457 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1458 } 1459 PetscFunctionReturn(0); 1460 } 1461 1462 #undef __FUNCT__ 1463 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1464 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1465 { 1466 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1467 PetscErrorCode ierr; 1468 1469 PetscFunctionBegin; 1470 /* do nondiagonal part */ 1471 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1472 /* send it on its way */ 1473 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1474 /* do local part */ 1475 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1476 /* receive remote parts: note this assumes the values are not actually */ 1477 /* inserted in yy until the next line, which is true for my implementation*/ 1478 /* but is not perhaps always true. */ 1479 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1480 PetscFunctionReturn(0); 1481 } 1482 1483 /* 1484 This only works correctly for square matrices where the subblock A->A is the 1485 diagonal block 1486 */ 1487 #undef __FUNCT__ 1488 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1489 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1490 { 1491 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1492 PetscErrorCode ierr; 1493 1494 PetscFunctionBegin; 1495 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1496 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1497 PetscFunctionReturn(0); 1498 } 1499 1500 #undef __FUNCT__ 1501 #define __FUNCT__ "MatScale_MPIBAIJ" 1502 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1503 { 1504 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1505 PetscErrorCode ierr; 1506 1507 PetscFunctionBegin; 1508 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1509 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1510 PetscFunctionReturn(0); 1511 } 1512 1513 #undef __FUNCT__ 1514 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1515 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1516 { 1517 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1518 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1519 PetscErrorCode ierr; 1520 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1521 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1522 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1523 1524 PetscFunctionBegin; 1525 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1526 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1527 mat->getrowactive = PETSC_TRUE; 1528 1529 if (!mat->rowvalues && (idx || v)) { 1530 /* 1531 allocate enough space to hold information from the longest row. 1532 */ 1533 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1534 PetscInt max = 1,mbs = mat->mbs,tmp; 1535 for (i=0; i<mbs; i++) { 1536 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1537 if (max < tmp) max = tmp; 1538 } 1539 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1540 } 1541 lrow = row - brstart; 1542 1543 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1544 if (!v) {pvA = 0; pvB = 0;} 1545 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1546 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1547 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1548 nztot = nzA + nzB; 1549 1550 cmap = mat->garray; 1551 if (v || idx) { 1552 if (nztot) { 1553 /* Sort by increasing column numbers, assuming A and B already sorted */ 1554 PetscInt imark = -1; 1555 if (v) { 1556 *v = v_p = mat->rowvalues; 1557 for (i=0; i<nzB; i++) { 1558 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1559 else break; 1560 } 1561 imark = i; 1562 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1563 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1564 } 1565 if (idx) { 1566 *idx = idx_p = mat->rowindices; 1567 if (imark > -1) { 1568 for (i=0; i<imark; i++) { 1569 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1570 } 1571 } else { 1572 for (i=0; i<nzB; i++) { 1573 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1574 else break; 1575 } 1576 imark = i; 1577 } 1578 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1579 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1580 } 1581 } else { 1582 if (idx) *idx = 0; 1583 if (v) *v = 0; 1584 } 1585 } 1586 *nz = nztot; 1587 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1588 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1589 PetscFunctionReturn(0); 1590 } 1591 1592 #undef __FUNCT__ 1593 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1594 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1595 { 1596 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1597 1598 PetscFunctionBegin; 1599 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1600 baij->getrowactive = PETSC_FALSE; 1601 PetscFunctionReturn(0); 1602 } 1603 1604 #undef __FUNCT__ 1605 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1606 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1607 { 1608 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1609 PetscErrorCode ierr; 1610 1611 PetscFunctionBegin; 1612 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1613 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1614 PetscFunctionReturn(0); 1615 } 1616 1617 #undef __FUNCT__ 1618 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1619 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1620 { 1621 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1622 Mat A = a->A,B = a->B; 1623 PetscErrorCode ierr; 1624 PetscReal isend[5],irecv[5]; 1625 1626 PetscFunctionBegin; 1627 info->block_size = (PetscReal)matin->rmap->bs; 1628 1629 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1630 1631 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1632 isend[3] = info->memory; isend[4] = info->mallocs; 1633 1634 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1635 1636 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1637 isend[3] += info->memory; isend[4] += info->mallocs; 1638 1639 if (flag == MAT_LOCAL) { 1640 info->nz_used = isend[0]; 1641 info->nz_allocated = isend[1]; 1642 info->nz_unneeded = isend[2]; 1643 info->memory = isend[3]; 1644 info->mallocs = isend[4]; 1645 } else if (flag == MAT_GLOBAL_MAX) { 1646 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1647 1648 info->nz_used = irecv[0]; 1649 info->nz_allocated = irecv[1]; 1650 info->nz_unneeded = irecv[2]; 1651 info->memory = irecv[3]; 1652 info->mallocs = irecv[4]; 1653 } else if (flag == MAT_GLOBAL_SUM) { 1654 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1655 1656 info->nz_used = irecv[0]; 1657 info->nz_allocated = irecv[1]; 1658 info->nz_unneeded = irecv[2]; 1659 info->memory = irecv[3]; 1660 info->mallocs = irecv[4]; 1661 } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1662 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1663 info->fill_ratio_needed = 0; 1664 info->factor_mallocs = 0; 1665 PetscFunctionReturn(0); 1666 } 1667 1668 #undef __FUNCT__ 1669 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1670 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1671 { 1672 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1673 PetscErrorCode ierr; 1674 1675 PetscFunctionBegin; 1676 switch (op) { 1677 case MAT_NEW_NONZERO_LOCATIONS: 1678 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1679 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1680 case MAT_KEEP_NONZERO_PATTERN: 1681 case MAT_NEW_NONZERO_LOCATION_ERR: 1682 MatCheckPreallocated(A,1); 1683 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1684 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1685 break; 1686 case MAT_ROW_ORIENTED: 1687 MatCheckPreallocated(A,1); 1688 a->roworiented = flg; 1689 1690 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1691 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1692 break; 1693 case MAT_NEW_DIAGONALS: 1694 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1695 break; 1696 case MAT_IGNORE_OFF_PROC_ENTRIES: 1697 a->donotstash = flg; 1698 break; 1699 case MAT_USE_HASH_TABLE: 1700 a->ht_flag = flg; 1701 break; 1702 case MAT_SYMMETRIC: 1703 case MAT_STRUCTURALLY_SYMMETRIC: 1704 case MAT_HERMITIAN: 1705 case MAT_SYMMETRY_ETERNAL: 1706 MatCheckPreallocated(A,1); 1707 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1708 break; 1709 default: 1710 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1711 } 1712 PetscFunctionReturn(0); 1713 } 1714 1715 #undef __FUNCT__ 1716 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1717 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1718 { 1719 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1720 Mat_SeqBAIJ *Aloc; 1721 Mat B; 1722 PetscErrorCode ierr; 1723 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1724 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1725 MatScalar *a; 1726 1727 PetscFunctionBegin; 1728 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1729 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1730 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1731 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1732 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1733 /* Do not know preallocation information, but must set block size */ 1734 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr); 1735 } else { 1736 B = *matout; 1737 } 1738 1739 /* copy over the A part */ 1740 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1741 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1742 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1743 1744 for (i=0; i<mbs; i++) { 1745 rvals[0] = bs*(baij->rstartbs + i); 1746 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1747 for (j=ai[i]; j<ai[i+1]; j++) { 1748 col = (baij->cstartbs+aj[j])*bs; 1749 for (k=0; k<bs; k++) { 1750 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1751 1752 col++; a += bs; 1753 } 1754 } 1755 } 1756 /* copy over the B part */ 1757 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1758 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1759 for (i=0; i<mbs; i++) { 1760 rvals[0] = bs*(baij->rstartbs + i); 1761 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1762 for (j=ai[i]; j<ai[i+1]; j++) { 1763 col = baij->garray[aj[j]]*bs; 1764 for (k=0; k<bs; k++) { 1765 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1766 col++; 1767 a += bs; 1768 } 1769 } 1770 } 1771 ierr = PetscFree(rvals);CHKERRQ(ierr); 1772 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1773 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1774 1775 if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B; 1776 else { 1777 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 1778 } 1779 PetscFunctionReturn(0); 1780 } 1781 1782 #undef __FUNCT__ 1783 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1784 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1785 { 1786 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1787 Mat a = baij->A,b = baij->B; 1788 PetscErrorCode ierr; 1789 PetscInt s1,s2,s3; 1790 1791 PetscFunctionBegin; 1792 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1793 if (rr) { 1794 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1795 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1796 /* Overlap communication with computation. */ 1797 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1798 } 1799 if (ll) { 1800 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1801 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1802 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1803 } 1804 /* scale the diagonal block */ 1805 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1806 1807 if (rr) { 1808 /* Do a scatter end and then right scale the off-diagonal block */ 1809 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1810 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1811 } 1812 PetscFunctionReturn(0); 1813 } 1814 1815 #undef __FUNCT__ 1816 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1817 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1818 { 1819 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1820 PetscInt *owners = A->rmap->range; 1821 PetscInt n = A->rmap->n; 1822 PetscSF sf; 1823 PetscInt *lrows; 1824 PetscSFNode *rrows; 1825 PetscInt r, p = 0, len = 0; 1826 PetscErrorCode ierr; 1827 1828 PetscFunctionBegin; 1829 /* Create SF where leaves are input rows and roots are owned rows */ 1830 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1831 for (r = 0; r < n; ++r) lrows[r] = -1; 1832 if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);} 1833 for (r = 0; r < N; ++r) { 1834 const PetscInt idx = rows[r]; 1835 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1836 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1837 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1838 } 1839 if (A->nooffproczerorows) { 1840 if (p != l->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,l->rank); 1841 lrows[len++] = idx - owners[p]; 1842 } else { 1843 rrows[r].rank = p; 1844 rrows[r].index = rows[r] - owners[p]; 1845 } 1846 } 1847 if (!A->nooffproczerorows) { 1848 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1849 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1850 /* Collect flags for rows to be zeroed */ 1851 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1852 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1853 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1854 /* Compress and put in row numbers */ 1855 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1856 } 1857 /* fix right hand side if needed */ 1858 if (x && b) { 1859 const PetscScalar *xx; 1860 PetscScalar *bb; 1861 1862 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1863 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1864 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 1865 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1866 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1867 } 1868 1869 /* actually zap the local rows */ 1870 /* 1871 Zero the required rows. If the "diagonal block" of the matrix 1872 is square and the user wishes to set the diagonal we use separate 1873 code so that MatSetValues() is not called for each diagonal allocating 1874 new memory, thus calling lots of mallocs and slowing things down. 1875 1876 */ 1877 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1878 ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1879 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1880 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr); 1881 } else if (diag != 0.0) { 1882 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr); 1883 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1884 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1885 for (r = 0; r < len; ++r) { 1886 const PetscInt row = lrows[r] + A->rmap->rstart; 1887 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1888 } 1889 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1890 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1891 } else { 1892 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1893 } 1894 ierr = PetscFree(lrows);CHKERRQ(ierr); 1895 1896 /* only change matrix nonzero state if pattern was allowed to be changed */ 1897 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1898 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1899 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1900 } 1901 PetscFunctionReturn(0); 1902 } 1903 1904 #undef __FUNCT__ 1905 #define __FUNCT__ "MatZeroRowsColumns_MPIBAIJ" 1906 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1907 { 1908 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1909 PetscErrorCode ierr; 1910 PetscMPIInt n = A->rmap->n; 1911 PetscInt i,j,k,r,p = 0,len = 0,row,col,count; 1912 PetscInt *lrows,*owners = A->rmap->range; 1913 PetscSFNode *rrows; 1914 PetscSF sf; 1915 const PetscScalar *xx; 1916 PetscScalar *bb,*mask; 1917 Vec xmask,lmask; 1918 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)l->B->data; 1919 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1920 PetscScalar *aa; 1921 1922 PetscFunctionBegin; 1923 /* Create SF where leaves are input rows and roots are owned rows */ 1924 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1925 for (r = 0; r < n; ++r) lrows[r] = -1; 1926 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1927 for (r = 0; r < N; ++r) { 1928 const PetscInt idx = rows[r]; 1929 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1930 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1931 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1932 } 1933 rrows[r].rank = p; 1934 rrows[r].index = rows[r] - owners[p]; 1935 } 1936 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1937 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1938 /* Collect flags for rows to be zeroed */ 1939 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1940 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1941 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1942 /* Compress and put in row numbers */ 1943 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1944 /* zero diagonal part of matrix */ 1945 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1946 /* handle off diagonal part of matrix */ 1947 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1948 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1949 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1950 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1951 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1952 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1953 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1954 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1955 if (x) { 1956 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1957 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1958 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1959 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1960 } 1961 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1962 /* remove zeroed rows of off diagonal matrix */ 1963 for (i = 0; i < len; ++i) { 1964 row = lrows[i]; 1965 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1966 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 1967 for (k = 0; k < count; ++k) { 1968 aa[0] = 0.0; 1969 aa += bs; 1970 } 1971 } 1972 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1973 for (i = 0; i < l->B->rmap->N; ++i) { 1974 row = i/bs; 1975 for (j = baij->i[row]; j < baij->i[row+1]; ++j) { 1976 for (k = 0; k < bs; ++k) { 1977 col = bs*baij->j[j] + k; 1978 if (PetscAbsScalar(mask[col])) { 1979 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 1980 if (b) bb[i] -= aa[0]*xx[col]; 1981 aa[0] = 0.0; 1982 } 1983 } 1984 } 1985 } 1986 if (x) { 1987 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1988 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1989 } 1990 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1991 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1992 ierr = PetscFree(lrows);CHKERRQ(ierr); 1993 1994 /* only change matrix nonzero state if pattern was allowed to be changed */ 1995 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1996 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1997 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1998 } 1999 PetscFunctionReturn(0); 2000 } 2001 2002 #undef __FUNCT__ 2003 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 2004 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 2005 { 2006 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2007 PetscErrorCode ierr; 2008 2009 PetscFunctionBegin; 2010 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2011 PetscFunctionReturn(0); 2012 } 2013 2014 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 2015 2016 #undef __FUNCT__ 2017 #define __FUNCT__ "MatEqual_MPIBAIJ" 2018 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 2019 { 2020 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 2021 Mat a,b,c,d; 2022 PetscBool flg; 2023 PetscErrorCode ierr; 2024 2025 PetscFunctionBegin; 2026 a = matA->A; b = matA->B; 2027 c = matB->A; d = matB->B; 2028 2029 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2030 if (flg) { 2031 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2032 } 2033 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2034 PetscFunctionReturn(0); 2035 } 2036 2037 #undef __FUNCT__ 2038 #define __FUNCT__ "MatCopy_MPIBAIJ" 2039 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 2040 { 2041 PetscErrorCode ierr; 2042 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2043 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2044 2045 PetscFunctionBegin; 2046 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2047 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2048 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2049 } else { 2050 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2051 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2052 } 2053 PetscFunctionReturn(0); 2054 } 2055 2056 #undef __FUNCT__ 2057 #define __FUNCT__ "MatSetUp_MPIBAIJ" 2058 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 2059 { 2060 PetscErrorCode ierr; 2061 2062 PetscFunctionBegin; 2063 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2064 PetscFunctionReturn(0); 2065 } 2066 2067 #undef __FUNCT__ 2068 #define __FUNCT__ "MatAXPYGetPreallocation_MPIBAIJ" 2069 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2070 { 2071 PetscErrorCode ierr; 2072 PetscInt bs = Y->rmap->bs,m = Y->rmap->N/bs; 2073 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2074 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2075 2076 PetscFunctionBegin; 2077 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2078 PetscFunctionReturn(0); 2079 } 2080 2081 #undef __FUNCT__ 2082 #define __FUNCT__ "MatAXPY_MPIBAIJ" 2083 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2084 { 2085 PetscErrorCode ierr; 2086 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 2087 PetscBLASInt bnz,one=1; 2088 Mat_SeqBAIJ *x,*y; 2089 2090 PetscFunctionBegin; 2091 if (str == SAME_NONZERO_PATTERN) { 2092 PetscScalar alpha = a; 2093 x = (Mat_SeqBAIJ*)xx->A->data; 2094 y = (Mat_SeqBAIJ*)yy->A->data; 2095 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2096 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2097 x = (Mat_SeqBAIJ*)xx->B->data; 2098 y = (Mat_SeqBAIJ*)yy->B->data; 2099 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2100 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2101 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2102 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2103 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2104 } else { 2105 Mat B; 2106 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 2107 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2108 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2109 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2110 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2111 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2112 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2113 ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr); 2114 ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2115 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2116 ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2117 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 2118 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2119 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2120 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2121 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2122 } 2123 PetscFunctionReturn(0); 2124 } 2125 2126 #undef __FUNCT__ 2127 #define __FUNCT__ "MatRealPart_MPIBAIJ" 2128 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 2129 { 2130 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2131 PetscErrorCode ierr; 2132 2133 PetscFunctionBegin; 2134 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2135 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2136 PetscFunctionReturn(0); 2137 } 2138 2139 #undef __FUNCT__ 2140 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 2141 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2142 { 2143 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2144 PetscErrorCode ierr; 2145 2146 PetscFunctionBegin; 2147 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2148 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2149 PetscFunctionReturn(0); 2150 } 2151 2152 #undef __FUNCT__ 2153 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2154 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2155 { 2156 PetscErrorCode ierr; 2157 IS iscol_local; 2158 PetscInt csize; 2159 2160 PetscFunctionBegin; 2161 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2162 if (call == MAT_REUSE_MATRIX) { 2163 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2164 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2165 } else { 2166 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2167 } 2168 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2169 if (call == MAT_INITIAL_MATRIX) { 2170 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2171 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2172 } 2173 PetscFunctionReturn(0); 2174 } 2175 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*); 2176 #undef __FUNCT__ 2177 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2178 /* 2179 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2180 in local and then by concatenating the local matrices the end result. 2181 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ(). 2182 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 2183 */ 2184 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2185 { 2186 PetscErrorCode ierr; 2187 PetscMPIInt rank,size; 2188 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2189 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow; 2190 Mat M,Mreuse; 2191 MatScalar *vwork,*aa; 2192 MPI_Comm comm; 2193 IS isrow_new, iscol_new; 2194 PetscBool idflag,allrows, allcols; 2195 Mat_SeqBAIJ *aij; 2196 2197 PetscFunctionBegin; 2198 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2199 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2200 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2201 /* The compression and expansion should be avoided. Doesn't point 2202 out errors, might change the indices, hence buggey */ 2203 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 2204 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 2205 2206 /* Check for special case: each processor gets entire matrix columns */ 2207 ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr); 2208 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 2209 if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE; 2210 else allcols = PETSC_FALSE; 2211 2212 ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr); 2213 ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr); 2214 if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE; 2215 else allrows = PETSC_FALSE; 2216 2217 if (call == MAT_REUSE_MATRIX) { 2218 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 2219 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2220 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2221 } else { 2222 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2223 } 2224 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 2225 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 2226 /* 2227 m - number of local rows 2228 n - number of columns (same on all processors) 2229 rstart - first row in new global matrix generated 2230 */ 2231 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2232 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2233 m = m/bs; 2234 n = n/bs; 2235 2236 if (call == MAT_INITIAL_MATRIX) { 2237 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2238 ii = aij->i; 2239 jj = aij->j; 2240 2241 /* 2242 Determine the number of non-zeros in the diagonal and off-diagonal 2243 portions of the matrix in order to do correct preallocation 2244 */ 2245 2246 /* first get start and end of "diagonal" columns */ 2247 if (csize == PETSC_DECIDE) { 2248 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2249 if (mglobal == n*bs) { /* square matrix */ 2250 nlocal = m; 2251 } else { 2252 nlocal = n/size + ((n % size) > rank); 2253 } 2254 } else { 2255 nlocal = csize/bs; 2256 } 2257 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2258 rstart = rend - nlocal; 2259 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2260 2261 /* next, compute all the lengths */ 2262 ierr = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr); 2263 for (i=0; i<m; i++) { 2264 jend = ii[i+1] - ii[i]; 2265 olen = 0; 2266 dlen = 0; 2267 for (j=0; j<jend; j++) { 2268 if (*jj < rstart || *jj >= rend) olen++; 2269 else dlen++; 2270 jj++; 2271 } 2272 olens[i] = olen; 2273 dlens[i] = dlen; 2274 } 2275 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2276 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2277 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2278 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2279 ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2280 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 2281 } else { 2282 PetscInt ml,nl; 2283 2284 M = *newmat; 2285 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2286 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2287 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2288 /* 2289 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2290 rather than the slower MatSetValues(). 2291 */ 2292 M->was_assembled = PETSC_TRUE; 2293 M->assembled = PETSC_FALSE; 2294 } 2295 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2296 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2297 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2298 ii = aij->i; 2299 jj = aij->j; 2300 aa = aij->a; 2301 for (i=0; i<m; i++) { 2302 row = rstart/bs + i; 2303 nz = ii[i+1] - ii[i]; 2304 cwork = jj; jj += nz; 2305 vwork = aa; aa += nz*bs*bs; 2306 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2307 } 2308 2309 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2310 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2311 *newmat = M; 2312 2313 /* save submatrix used in processor for next request */ 2314 if (call == MAT_INITIAL_MATRIX) { 2315 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2316 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2317 } 2318 PetscFunctionReturn(0); 2319 } 2320 2321 #undef __FUNCT__ 2322 #define __FUNCT__ "MatPermute_MPIBAIJ" 2323 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2324 { 2325 MPI_Comm comm,pcomm; 2326 PetscInt clocal_size,nrows; 2327 const PetscInt *rows; 2328 PetscMPIInt size; 2329 IS crowp,lcolp; 2330 PetscErrorCode ierr; 2331 2332 PetscFunctionBegin; 2333 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2334 /* make a collective version of 'rowp' */ 2335 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2336 if (pcomm==comm) { 2337 crowp = rowp; 2338 } else { 2339 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2340 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2341 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2342 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2343 } 2344 ierr = ISSetPermutation(crowp);CHKERRQ(ierr); 2345 /* make a local version of 'colp' */ 2346 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2347 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2348 if (size==1) { 2349 lcolp = colp; 2350 } else { 2351 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 2352 } 2353 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2354 /* now we just get the submatrix */ 2355 ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr); 2356 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2357 /* clean up */ 2358 if (pcomm!=comm) { 2359 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2360 } 2361 if (size>1) { 2362 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2363 } 2364 PetscFunctionReturn(0); 2365 } 2366 2367 #undef __FUNCT__ 2368 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2369 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2370 { 2371 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2372 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2373 2374 PetscFunctionBegin; 2375 if (nghosts) *nghosts = B->nbs; 2376 if (ghosts) *ghosts = baij->garray; 2377 PetscFunctionReturn(0); 2378 } 2379 2380 #undef __FUNCT__ 2381 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ" 2382 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2383 { 2384 Mat B; 2385 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2386 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2387 Mat_SeqAIJ *b; 2388 PetscErrorCode ierr; 2389 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2390 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2391 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2392 2393 PetscFunctionBegin; 2394 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2395 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2396 2397 /* ---------------------------------------------------------------- 2398 Tell every processor the number of nonzeros per row 2399 */ 2400 ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr); 2401 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2402 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2403 } 2404 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2405 ierr = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr); 2406 displs = recvcounts + size; 2407 for (i=0; i<size; i++) { 2408 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2409 displs[i] = A->rmap->range[i]/bs; 2410 } 2411 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2412 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2413 #else 2414 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2415 #endif 2416 /* --------------------------------------------------------------- 2417 Create the sequential matrix of the same type as the local block diagonal 2418 */ 2419 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2420 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2421 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2422 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2423 b = (Mat_SeqAIJ*)B->data; 2424 2425 /*-------------------------------------------------------------------- 2426 Copy my part of matrix column indices over 2427 */ 2428 sendcount = ad->nz + bd->nz; 2429 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2430 a_jsendbuf = ad->j; 2431 b_jsendbuf = bd->j; 2432 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2433 cnt = 0; 2434 for (i=0; i<n; i++) { 2435 2436 /* put in lower diagonal portion */ 2437 m = bd->i[i+1] - bd->i[i]; 2438 while (m > 0) { 2439 /* is it above diagonal (in bd (compressed) numbering) */ 2440 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2441 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2442 m--; 2443 } 2444 2445 /* put in diagonal portion */ 2446 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2447 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2448 } 2449 2450 /* put in upper diagonal portion */ 2451 while (m-- > 0) { 2452 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2453 } 2454 } 2455 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2456 2457 /*-------------------------------------------------------------------- 2458 Gather all column indices to all processors 2459 */ 2460 for (i=0; i<size; i++) { 2461 recvcounts[i] = 0; 2462 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2463 recvcounts[i] += lens[j]; 2464 } 2465 } 2466 displs[0] = 0; 2467 for (i=1; i<size; i++) { 2468 displs[i] = displs[i-1] + recvcounts[i-1]; 2469 } 2470 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2471 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2472 #else 2473 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2474 #endif 2475 /*-------------------------------------------------------------------- 2476 Assemble the matrix into useable form (note numerical values not yet set) 2477 */ 2478 /* set the b->ilen (length of each row) values */ 2479 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2480 /* set the b->i indices */ 2481 b->i[0] = 0; 2482 for (i=1; i<=A->rmap->N/bs; i++) { 2483 b->i[i] = b->i[i-1] + lens[i-1]; 2484 } 2485 ierr = PetscFree(lens);CHKERRQ(ierr); 2486 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2487 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2488 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2489 2490 if (A->symmetric) { 2491 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2492 } else if (A->hermitian) { 2493 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2494 } else if (A->structurally_symmetric) { 2495 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2496 } 2497 *newmat = B; 2498 PetscFunctionReturn(0); 2499 } 2500 2501 #undef __FUNCT__ 2502 #define __FUNCT__ "MatSOR_MPIBAIJ" 2503 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2504 { 2505 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2506 PetscErrorCode ierr; 2507 Vec bb1 = 0; 2508 2509 PetscFunctionBegin; 2510 if (flag == SOR_APPLY_UPPER) { 2511 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2512 PetscFunctionReturn(0); 2513 } 2514 2515 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2516 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2517 } 2518 2519 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2520 if (flag & SOR_ZERO_INITIAL_GUESS) { 2521 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2522 its--; 2523 } 2524 2525 while (its--) { 2526 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2527 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2528 2529 /* update rhs: bb1 = bb - B*x */ 2530 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2531 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2532 2533 /* local sweep */ 2534 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2535 } 2536 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2537 if (flag & SOR_ZERO_INITIAL_GUESS) { 2538 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2539 its--; 2540 } 2541 while (its--) { 2542 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2543 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2544 2545 /* update rhs: bb1 = bb - B*x */ 2546 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2547 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2548 2549 /* local sweep */ 2550 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2551 } 2552 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2553 if (flag & SOR_ZERO_INITIAL_GUESS) { 2554 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2555 its--; 2556 } 2557 while (its--) { 2558 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2559 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2560 2561 /* update rhs: bb1 = bb - B*x */ 2562 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2563 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2564 2565 /* local sweep */ 2566 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2567 } 2568 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2569 2570 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2571 PetscFunctionReturn(0); 2572 } 2573 2574 #undef __FUNCT__ 2575 #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ" 2576 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms) 2577 { 2578 PetscErrorCode ierr; 2579 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data; 2580 PetscInt N,i,*garray = aij->garray; 2581 PetscInt ib,jb,bs = A->rmap->bs; 2582 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data; 2583 MatScalar *a_val = a_aij->a; 2584 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data; 2585 MatScalar *b_val = b_aij->a; 2586 PetscReal *work; 2587 2588 PetscFunctionBegin; 2589 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 2590 ierr = PetscCalloc1(N,&work);CHKERRQ(ierr); 2591 if (type == NORM_2) { 2592 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2593 for (jb=0; jb<bs; jb++) { 2594 for (ib=0; ib<bs; ib++) { 2595 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2596 a_val++; 2597 } 2598 } 2599 } 2600 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2601 for (jb=0; jb<bs; jb++) { 2602 for (ib=0; ib<bs; ib++) { 2603 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2604 b_val++; 2605 } 2606 } 2607 } 2608 } else if (type == NORM_1) { 2609 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2610 for (jb=0; jb<bs; jb++) { 2611 for (ib=0; ib<bs; ib++) { 2612 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2613 a_val++; 2614 } 2615 } 2616 } 2617 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2618 for (jb=0; jb<bs; jb++) { 2619 for (ib=0; ib<bs; ib++) { 2620 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2621 b_val++; 2622 } 2623 } 2624 } 2625 } else if (type == NORM_INFINITY) { 2626 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2627 for (jb=0; jb<bs; jb++) { 2628 for (ib=0; ib<bs; ib++) { 2629 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2630 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2631 a_val++; 2632 } 2633 } 2634 } 2635 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2636 for (jb=0; jb<bs; jb++) { 2637 for (ib=0; ib<bs; ib++) { 2638 int col = garray[b_aij->j[i]] * bs + jb; 2639 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2640 b_val++; 2641 } 2642 } 2643 } 2644 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 2645 if (type == NORM_INFINITY) { 2646 ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2647 } else { 2648 ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2649 } 2650 ierr = PetscFree(work);CHKERRQ(ierr); 2651 if (type == NORM_2) { 2652 for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]); 2653 } 2654 PetscFunctionReturn(0); 2655 } 2656 2657 #undef __FUNCT__ 2658 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ" 2659 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2660 { 2661 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2662 PetscErrorCode ierr; 2663 2664 PetscFunctionBegin; 2665 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2666 A->errortype = a->A->errortype; 2667 PetscFunctionReturn(0); 2668 } 2669 2670 #undef __FUNCT__ 2671 #define __FUNCT__ "MatShift_MPIBAIJ" 2672 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a) 2673 { 2674 PetscErrorCode ierr; 2675 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ*)Y->data; 2676 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)maij->A->data; 2677 2678 PetscFunctionBegin; 2679 if (!Y->preallocated) { 2680 ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 2681 } else if (!aij->nz) { 2682 PetscInt nonew = aij->nonew; 2683 ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2684 aij->nonew = nonew; 2685 } 2686 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2687 PetscFunctionReturn(0); 2688 } 2689 2690 #undef __FUNCT__ 2691 #define __FUNCT__ "MatMissingDiagonal_MPIBAIJ" 2692 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool *missing,PetscInt *d) 2693 { 2694 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2695 PetscErrorCode ierr; 2696 2697 PetscFunctionBegin; 2698 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2699 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2700 if (d) { 2701 PetscInt rstart; 2702 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2703 *d += rstart/A->rmap->bs; 2704 2705 } 2706 PetscFunctionReturn(0); 2707 } 2708 2709 /* -------------------------------------------------------------------*/ 2710 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2711 MatGetRow_MPIBAIJ, 2712 MatRestoreRow_MPIBAIJ, 2713 MatMult_MPIBAIJ, 2714 /* 4*/ MatMultAdd_MPIBAIJ, 2715 MatMultTranspose_MPIBAIJ, 2716 MatMultTransposeAdd_MPIBAIJ, 2717 0, 2718 0, 2719 0, 2720 /*10*/ 0, 2721 0, 2722 0, 2723 MatSOR_MPIBAIJ, 2724 MatTranspose_MPIBAIJ, 2725 /*15*/ MatGetInfo_MPIBAIJ, 2726 MatEqual_MPIBAIJ, 2727 MatGetDiagonal_MPIBAIJ, 2728 MatDiagonalScale_MPIBAIJ, 2729 MatNorm_MPIBAIJ, 2730 /*20*/ MatAssemblyBegin_MPIBAIJ, 2731 MatAssemblyEnd_MPIBAIJ, 2732 MatSetOption_MPIBAIJ, 2733 MatZeroEntries_MPIBAIJ, 2734 /*24*/ MatZeroRows_MPIBAIJ, 2735 0, 2736 0, 2737 0, 2738 0, 2739 /*29*/ MatSetUp_MPIBAIJ, 2740 0, 2741 0, 2742 0, 2743 0, 2744 /*34*/ MatDuplicate_MPIBAIJ, 2745 0, 2746 0, 2747 0, 2748 0, 2749 /*39*/ MatAXPY_MPIBAIJ, 2750 MatGetSubMatrices_MPIBAIJ, 2751 MatIncreaseOverlap_MPIBAIJ, 2752 MatGetValues_MPIBAIJ, 2753 MatCopy_MPIBAIJ, 2754 /*44*/ 0, 2755 MatScale_MPIBAIJ, 2756 MatShift_MPIBAIJ, 2757 0, 2758 MatZeroRowsColumns_MPIBAIJ, 2759 /*49*/ 0, 2760 0, 2761 0, 2762 0, 2763 0, 2764 /*54*/ MatFDColoringCreate_MPIXAIJ, 2765 0, 2766 MatSetUnfactored_MPIBAIJ, 2767 MatPermute_MPIBAIJ, 2768 MatSetValuesBlocked_MPIBAIJ, 2769 /*59*/ MatGetSubMatrix_MPIBAIJ, 2770 MatDestroy_MPIBAIJ, 2771 MatView_MPIBAIJ, 2772 0, 2773 0, 2774 /*64*/ 0, 2775 0, 2776 0, 2777 0, 2778 0, 2779 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2780 0, 2781 0, 2782 0, 2783 0, 2784 /*74*/ 0, 2785 MatFDColoringApply_BAIJ, 2786 0, 2787 0, 2788 0, 2789 /*79*/ 0, 2790 0, 2791 0, 2792 0, 2793 MatLoad_MPIBAIJ, 2794 /*84*/ 0, 2795 0, 2796 0, 2797 0, 2798 0, 2799 /*89*/ 0, 2800 0, 2801 0, 2802 0, 2803 0, 2804 /*94*/ 0, 2805 0, 2806 0, 2807 0, 2808 0, 2809 /*99*/ 0, 2810 0, 2811 0, 2812 0, 2813 0, 2814 /*104*/0, 2815 MatRealPart_MPIBAIJ, 2816 MatImaginaryPart_MPIBAIJ, 2817 0, 2818 0, 2819 /*109*/0, 2820 0, 2821 0, 2822 0, 2823 MatMissingDiagonal_MPIBAIJ, 2824 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2825 0, 2826 MatGetGhosts_MPIBAIJ, 2827 0, 2828 0, 2829 /*119*/0, 2830 0, 2831 0, 2832 0, 2833 MatGetMultiProcBlock_MPIBAIJ, 2834 /*124*/0, 2835 MatGetColumnNorms_MPIBAIJ, 2836 MatInvertBlockDiagonal_MPIBAIJ, 2837 0, 2838 0, 2839 /*129*/ 0, 2840 0, 2841 0, 2842 0, 2843 0, 2844 /*134*/ 0, 2845 0, 2846 0, 2847 0, 2848 0, 2849 /*139*/ 0, 2850 0, 2851 0, 2852 MatFDColoringSetUp_MPIXAIJ, 2853 0, 2854 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ 2855 }; 2856 2857 #undef __FUNCT__ 2858 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2859 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2860 { 2861 PetscFunctionBegin; 2862 *a = ((Mat_MPIBAIJ*)A->data)->A; 2863 PetscFunctionReturn(0); 2864 } 2865 2866 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2867 2868 #undef __FUNCT__ 2869 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2870 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2871 { 2872 PetscInt m,rstart,cstart,cend; 2873 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2874 const PetscInt *JJ =0; 2875 PetscScalar *values=0; 2876 PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented; 2877 PetscErrorCode ierr; 2878 2879 PetscFunctionBegin; 2880 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2881 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2882 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2883 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2884 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2885 m = B->rmap->n/bs; 2886 rstart = B->rmap->rstart/bs; 2887 cstart = B->cmap->rstart/bs; 2888 cend = B->cmap->rend/bs; 2889 2890 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2891 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2892 for (i=0; i<m; i++) { 2893 nz = ii[i+1] - ii[i]; 2894 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2895 nz_max = PetscMax(nz_max,nz); 2896 JJ = jj + ii[i]; 2897 for (j=0; j<nz; j++) { 2898 if (*JJ >= cstart) break; 2899 JJ++; 2900 } 2901 d = 0; 2902 for (; j<nz; j++) { 2903 if (*JJ++ >= cend) break; 2904 d++; 2905 } 2906 d_nnz[i] = d; 2907 o_nnz[i] = nz - d; 2908 } 2909 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2910 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2911 2912 values = (PetscScalar*)V; 2913 if (!values) { 2914 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2915 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2916 } 2917 for (i=0; i<m; i++) { 2918 PetscInt row = i + rstart; 2919 PetscInt ncols = ii[i+1] - ii[i]; 2920 const PetscInt *icols = jj + ii[i]; 2921 if (!roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2922 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2923 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2924 } else { /* block ordering does not match so we can only insert one block at a time. */ 2925 PetscInt j; 2926 for (j=0; j<ncols; j++) { 2927 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2928 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2929 } 2930 } 2931 } 2932 2933 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2934 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2935 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2936 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2937 PetscFunctionReturn(0); 2938 } 2939 2940 #undef __FUNCT__ 2941 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2942 /*@C 2943 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2944 (the default parallel PETSc format). 2945 2946 Collective on MPI_Comm 2947 2948 Input Parameters: 2949 + B - the matrix 2950 . bs - the block size 2951 . i - the indices into j for the start of each local row (starts with zero) 2952 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2953 - v - optional values in the matrix 2954 2955 Level: developer 2956 2957 Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 2958 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 2959 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2960 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2961 block column and the second index is over columns within a block. 2962 2963 .keywords: matrix, aij, compressed row, sparse, parallel 2964 2965 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ 2966 @*/ 2967 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2968 { 2969 PetscErrorCode ierr; 2970 2971 PetscFunctionBegin; 2972 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2973 PetscValidType(B,1); 2974 PetscValidLogicalCollectiveInt(B,bs,2); 2975 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2976 PetscFunctionReturn(0); 2977 } 2978 2979 #undef __FUNCT__ 2980 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2981 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2982 { 2983 Mat_MPIBAIJ *b; 2984 PetscErrorCode ierr; 2985 PetscInt i; 2986 2987 PetscFunctionBegin; 2988 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2989 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2990 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2991 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2992 2993 if (d_nnz) { 2994 for (i=0; i<B->rmap->n/bs; i++) { 2995 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2996 } 2997 } 2998 if (o_nnz) { 2999 for (i=0; i<B->rmap->n/bs; i++) { 3000 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 3001 } 3002 } 3003 3004 b = (Mat_MPIBAIJ*)B->data; 3005 b->bs2 = bs*bs; 3006 b->mbs = B->rmap->n/bs; 3007 b->nbs = B->cmap->n/bs; 3008 b->Mbs = B->rmap->N/bs; 3009 b->Nbs = B->cmap->N/bs; 3010 3011 for (i=0; i<=b->size; i++) { 3012 b->rangebs[i] = B->rmap->range[i]/bs; 3013 } 3014 b->rstartbs = B->rmap->rstart/bs; 3015 b->rendbs = B->rmap->rend/bs; 3016 b->cstartbs = B->cmap->rstart/bs; 3017 b->cendbs = B->cmap->rend/bs; 3018 3019 if (!B->preallocated) { 3020 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3021 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3022 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 3023 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3024 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3025 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3026 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 3027 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3028 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 3029 } 3030 3031 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3032 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 3033 B->preallocated = PETSC_TRUE; 3034 PetscFunctionReturn(0); 3035 } 3036 3037 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 3038 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 3039 3040 #undef __FUNCT__ 3041 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 3042 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 3043 { 3044 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 3045 PetscErrorCode ierr; 3046 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 3047 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 3048 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 3049 3050 PetscFunctionBegin; 3051 ierr = PetscMalloc1(M+1,&ii);CHKERRQ(ierr); 3052 ii[0] = 0; 3053 for (i=0; i<M; i++) { 3054 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 3055 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 3056 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 3057 /* remove one from count of matrix has diagonal */ 3058 for (j=id[i]; j<id[i+1]; j++) { 3059 if (jd[j] == i) {ii[i+1]--;break;} 3060 } 3061 } 3062 ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr); 3063 cnt = 0; 3064 for (i=0; i<M; i++) { 3065 for (j=io[i]; j<io[i+1]; j++) { 3066 if (garray[jo[j]] > rstart) break; 3067 jj[cnt++] = garray[jo[j]]; 3068 } 3069 for (k=id[i]; k<id[i+1]; k++) { 3070 if (jd[k] != i) { 3071 jj[cnt++] = rstart + jd[k]; 3072 } 3073 } 3074 for (; j<io[i+1]; j++) { 3075 jj[cnt++] = garray[jo[j]]; 3076 } 3077 } 3078 ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr); 3079 PetscFunctionReturn(0); 3080 } 3081 3082 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3083 3084 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 3085 3086 #undef __FUNCT__ 3087 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 3088 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 3089 { 3090 PetscErrorCode ierr; 3091 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3092 Mat B; 3093 Mat_MPIAIJ *b; 3094 3095 PetscFunctionBegin; 3096 if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 3097 3098 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3099 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 3100 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3101 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3102 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 3103 ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 3104 b = (Mat_MPIAIJ*) B->data; 3105 3106 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 3107 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 3108 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 3109 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 3110 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 3111 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3112 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3113 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3114 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3115 if (reuse == MAT_INPLACE_MATRIX) { 3116 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 3117 } else { 3118 *newmat = B; 3119 } 3120 PetscFunctionReturn(0); 3121 } 3122 3123 /*MC 3124 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 3125 3126 Options Database Keys: 3127 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 3128 . -mat_block_size <bs> - set the blocksize used to store the matrix 3129 - -mat_use_hash_table <fact> 3130 3131 Level: beginner 3132 3133 .seealso: MatCreateMPIBAIJ 3134 M*/ 3135 3136 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 3137 3138 #undef __FUNCT__ 3139 #define __FUNCT__ "MatCreate_MPIBAIJ" 3140 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3141 { 3142 Mat_MPIBAIJ *b; 3143 PetscErrorCode ierr; 3144 PetscBool flg = PETSC_FALSE; 3145 3146 PetscFunctionBegin; 3147 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3148 B->data = (void*)b; 3149 3150 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3151 B->assembled = PETSC_FALSE; 3152 3153 B->insertmode = NOT_SET_VALUES; 3154 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 3155 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 3156 3157 /* build local table of row and column ownerships */ 3158 ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr); 3159 3160 /* build cache for off array entries formed */ 3161 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 3162 3163 b->donotstash = PETSC_FALSE; 3164 b->colmap = NULL; 3165 b->garray = NULL; 3166 b->roworiented = PETSC_TRUE; 3167 3168 /* stuff used in block assembly */ 3169 b->barray = 0; 3170 3171 /* stuff used for matrix vector multiply */ 3172 b->lvec = 0; 3173 b->Mvctx = 0; 3174 3175 /* stuff for MatGetRow() */ 3176 b->rowindices = 0; 3177 b->rowvalues = 0; 3178 b->getrowactive = PETSC_FALSE; 3179 3180 /* hash table stuff */ 3181 b->ht = 0; 3182 b->hd = 0; 3183 b->ht_size = 0; 3184 b->ht_flag = PETSC_FALSE; 3185 b->ht_fact = 0; 3186 b->ht_total_ct = 0; 3187 b->ht_insert_ct = 0; 3188 3189 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 3190 b->ijonly = PETSC_FALSE; 3191 3192 3193 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3194 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3195 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3196 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3197 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3198 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3199 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3200 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3201 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3202 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3203 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr); 3204 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3205 3206 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3207 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 3208 if (flg) { 3209 PetscReal fact = 1.39; 3210 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3211 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 3212 if (fact <= 1.0) fact = 1.39; 3213 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3214 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3215 } 3216 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3217 PetscFunctionReturn(0); 3218 } 3219 3220 /*MC 3221 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3222 3223 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3224 and MATMPIBAIJ otherwise. 3225 3226 Options Database Keys: 3227 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3228 3229 Level: beginner 3230 3231 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3232 M*/ 3233 3234 #undef __FUNCT__ 3235 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3236 /*@C 3237 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3238 (block compressed row). For good matrix assembly performance 3239 the user should preallocate the matrix storage by setting the parameters 3240 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3241 performance can be increased by more than a factor of 50. 3242 3243 Collective on Mat 3244 3245 Input Parameters: 3246 + B - the matrix 3247 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3248 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3249 . d_nz - number of block nonzeros per block row in diagonal portion of local 3250 submatrix (same for all local rows) 3251 . d_nnz - array containing the number of block nonzeros in the various block rows 3252 of the in diagonal portion of the local (possibly different for each block 3253 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 3254 set it even if it is zero. 3255 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3256 submatrix (same for all local rows). 3257 - o_nnz - array containing the number of nonzeros in the various block rows of the 3258 off-diagonal portion of the local submatrix (possibly different for 3259 each block row) or NULL. 3260 3261 If the *_nnz parameter is given then the *_nz parameter is ignored 3262 3263 Options Database Keys: 3264 + -mat_block_size - size of the blocks to use 3265 - -mat_use_hash_table <fact> 3266 3267 Notes: 3268 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3269 than it must be used on all processors that share the object for that argument. 3270 3271 Storage Information: 3272 For a square global matrix we define each processor's diagonal portion 3273 to be its local rows and the corresponding columns (a square submatrix); 3274 each processor's off-diagonal portion encompasses the remainder of the 3275 local matrix (a rectangular submatrix). 3276 3277 The user can specify preallocated storage for the diagonal part of 3278 the local submatrix with either d_nz or d_nnz (not both). Set 3279 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3280 memory allocation. Likewise, specify preallocated storage for the 3281 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3282 3283 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3284 the figure below we depict these three local rows and all columns (0-11). 3285 3286 .vb 3287 0 1 2 3 4 5 6 7 8 9 10 11 3288 -------------------------- 3289 row 3 |o o o d d d o o o o o o 3290 row 4 |o o o d d d o o o o o o 3291 row 5 |o o o d d d o o o o o o 3292 -------------------------- 3293 .ve 3294 3295 Thus, any entries in the d locations are stored in the d (diagonal) 3296 submatrix, and any entries in the o locations are stored in the 3297 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3298 stored simply in the MATSEQBAIJ format for compressed row storage. 3299 3300 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3301 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3302 In general, for PDE problems in which most nonzeros are near the diagonal, 3303 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3304 or you will get TERRIBLE performance; see the users' manual chapter on 3305 matrices. 3306 3307 You can call MatGetInfo() to get information on how effective the preallocation was; 3308 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3309 You can also run with the option -info and look for messages with the string 3310 malloc in them to see if additional memory allocation was needed. 3311 3312 Level: intermediate 3313 3314 .keywords: matrix, block, aij, compressed row, sparse, parallel 3315 3316 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3317 @*/ 3318 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3319 { 3320 PetscErrorCode ierr; 3321 3322 PetscFunctionBegin; 3323 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3324 PetscValidType(B,1); 3325 PetscValidLogicalCollectiveInt(B,bs,2); 3326 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3327 PetscFunctionReturn(0); 3328 } 3329 3330 #undef __FUNCT__ 3331 #define __FUNCT__ "MatCreateBAIJ" 3332 /*@C 3333 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3334 (block compressed row). For good matrix assembly performance 3335 the user should preallocate the matrix storage by setting the parameters 3336 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3337 performance can be increased by more than a factor of 50. 3338 3339 Collective on MPI_Comm 3340 3341 Input Parameters: 3342 + comm - MPI communicator 3343 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3344 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3345 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3346 This value should be the same as the local size used in creating the 3347 y vector for the matrix-vector product y = Ax. 3348 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3349 This value should be the same as the local size used in creating the 3350 x vector for the matrix-vector product y = Ax. 3351 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3352 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3353 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3354 submatrix (same for all local rows) 3355 . d_nnz - array containing the number of nonzero blocks in the various block rows 3356 of the in diagonal portion of the local (possibly different for each block 3357 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3358 and set it even if it is zero. 3359 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3360 submatrix (same for all local rows). 3361 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3362 off-diagonal portion of the local submatrix (possibly different for 3363 each block row) or NULL. 3364 3365 Output Parameter: 3366 . A - the matrix 3367 3368 Options Database Keys: 3369 + -mat_block_size - size of the blocks to use 3370 - -mat_use_hash_table <fact> 3371 3372 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3373 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3374 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3375 3376 Notes: 3377 If the *_nnz parameter is given then the *_nz parameter is ignored 3378 3379 A nonzero block is any block that as 1 or more nonzeros in it 3380 3381 The user MUST specify either the local or global matrix dimensions 3382 (possibly both). 3383 3384 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3385 than it must be used on all processors that share the object for that argument. 3386 3387 Storage Information: 3388 For a square global matrix we define each processor's diagonal portion 3389 to be its local rows and the corresponding columns (a square submatrix); 3390 each processor's off-diagonal portion encompasses the remainder of the 3391 local matrix (a rectangular submatrix). 3392 3393 The user can specify preallocated storage for the diagonal part of 3394 the local submatrix with either d_nz or d_nnz (not both). Set 3395 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3396 memory allocation. Likewise, specify preallocated storage for the 3397 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3398 3399 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3400 the figure below we depict these three local rows and all columns (0-11). 3401 3402 .vb 3403 0 1 2 3 4 5 6 7 8 9 10 11 3404 -------------------------- 3405 row 3 |o o o d d d o o o o o o 3406 row 4 |o o o d d d o o o o o o 3407 row 5 |o o o d d d o o o o o o 3408 -------------------------- 3409 .ve 3410 3411 Thus, any entries in the d locations are stored in the d (diagonal) 3412 submatrix, and any entries in the o locations are stored in the 3413 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3414 stored simply in the MATSEQBAIJ format for compressed row storage. 3415 3416 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3417 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3418 In general, for PDE problems in which most nonzeros are near the diagonal, 3419 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3420 or you will get TERRIBLE performance; see the users' manual chapter on 3421 matrices. 3422 3423 Level: intermediate 3424 3425 .keywords: matrix, block, aij, compressed row, sparse, parallel 3426 3427 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3428 @*/ 3429 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3430 { 3431 PetscErrorCode ierr; 3432 PetscMPIInt size; 3433 3434 PetscFunctionBegin; 3435 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3436 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3437 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3438 if (size > 1) { 3439 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3440 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3441 } else { 3442 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3443 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3444 } 3445 PetscFunctionReturn(0); 3446 } 3447 3448 #undef __FUNCT__ 3449 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3450 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3451 { 3452 Mat mat; 3453 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3454 PetscErrorCode ierr; 3455 PetscInt len=0; 3456 3457 PetscFunctionBegin; 3458 *newmat = 0; 3459 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3460 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3461 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3462 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3463 3464 mat->factortype = matin->factortype; 3465 mat->preallocated = PETSC_TRUE; 3466 mat->assembled = PETSC_TRUE; 3467 mat->insertmode = NOT_SET_VALUES; 3468 3469 a = (Mat_MPIBAIJ*)mat->data; 3470 mat->rmap->bs = matin->rmap->bs; 3471 a->bs2 = oldmat->bs2; 3472 a->mbs = oldmat->mbs; 3473 a->nbs = oldmat->nbs; 3474 a->Mbs = oldmat->Mbs; 3475 a->Nbs = oldmat->Nbs; 3476 3477 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3478 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3479 3480 a->size = oldmat->size; 3481 a->rank = oldmat->rank; 3482 a->donotstash = oldmat->donotstash; 3483 a->roworiented = oldmat->roworiented; 3484 a->rowindices = 0; 3485 a->rowvalues = 0; 3486 a->getrowactive = PETSC_FALSE; 3487 a->barray = 0; 3488 a->rstartbs = oldmat->rstartbs; 3489 a->rendbs = oldmat->rendbs; 3490 a->cstartbs = oldmat->cstartbs; 3491 a->cendbs = oldmat->cendbs; 3492 3493 /* hash table stuff */ 3494 a->ht = 0; 3495 a->hd = 0; 3496 a->ht_size = 0; 3497 a->ht_flag = oldmat->ht_flag; 3498 a->ht_fact = oldmat->ht_fact; 3499 a->ht_total_ct = 0; 3500 a->ht_insert_ct = 0; 3501 3502 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3503 if (oldmat->colmap) { 3504 #if defined(PETSC_USE_CTABLE) 3505 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3506 #else 3507 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 3508 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3509 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3510 #endif 3511 } else a->colmap = 0; 3512 3513 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3514 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 3515 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3516 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3517 } else a->garray = 0; 3518 3519 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3520 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3521 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3522 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3523 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3524 3525 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3526 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3527 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3528 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3529 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3530 *newmat = mat; 3531 PetscFunctionReturn(0); 3532 } 3533 3534 #undef __FUNCT__ 3535 #define __FUNCT__ "MatLoad_MPIBAIJ" 3536 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3537 { 3538 PetscErrorCode ierr; 3539 int fd; 3540 PetscInt i,nz,j,rstart,rend; 3541 PetscScalar *vals,*buf; 3542 MPI_Comm comm; 3543 MPI_Status status; 3544 PetscMPIInt rank,size,maxnz; 3545 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3546 PetscInt *locrowlens = NULL,*procsnz = NULL,*browners = NULL; 3547 PetscInt jj,*mycols,*ibuf,bs = newmat->rmap->bs,Mbs,mbs,extra_rows,mmax; 3548 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3549 PetscInt *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount; 3550 PetscInt dcount,kmax,k,nzcount,tmp,mend; 3551 3552 PetscFunctionBegin; 3553 /* force binary viewer to load .info file if it has not yet done so */ 3554 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3555 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3556 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3557 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3558 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3559 if (bs < 0) bs = 1; 3560 3561 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3562 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3563 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3564 if (!rank) { 3565 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3566 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3567 } 3568 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3569 M = header[1]; N = header[2]; 3570 3571 /* If global sizes are set, check if they are consistent with that given in the file */ 3572 if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M); 3573 if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N); 3574 3575 if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices"); 3576 3577 /* 3578 This code adds extra rows to make sure the number of rows is 3579 divisible by the blocksize 3580 */ 3581 Mbs = M/bs; 3582 extra_rows = bs - M + bs*Mbs; 3583 if (extra_rows == bs) extra_rows = 0; 3584 else Mbs++; 3585 if (extra_rows && !rank) { 3586 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3587 } 3588 3589 /* determine ownership of all rows */ 3590 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3591 mbs = Mbs/size + ((Mbs % size) > rank); 3592 m = mbs*bs; 3593 } else { /* User set */ 3594 m = newmat->rmap->n; 3595 mbs = m/bs; 3596 } 3597 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 3598 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3599 3600 /* process 0 needs enough room for process with most rows */ 3601 if (!rank) { 3602 mmax = rowners[1]; 3603 for (i=2; i<=size; i++) { 3604 mmax = PetscMax(mmax,rowners[i]); 3605 } 3606 mmax*=bs; 3607 } else mmax = -1; /* unused, but compiler warns anyway */ 3608 3609 rowners[0] = 0; 3610 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3611 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3612 rstart = rowners[rank]; 3613 rend = rowners[rank+1]; 3614 3615 /* distribute row lengths to all processors */ 3616 ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr); 3617 if (!rank) { 3618 mend = m; 3619 if (size == 1) mend = mend - extra_rows; 3620 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3621 for (j=mend; j<m; j++) locrowlens[j] = 1; 3622 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3623 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3624 for (j=0; j<m; j++) { 3625 procsnz[0] += locrowlens[j]; 3626 } 3627 for (i=1; i<size; i++) { 3628 mend = browners[i+1] - browners[i]; 3629 if (i == size-1) mend = mend - extra_rows; 3630 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3631 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3632 /* calculate the number of nonzeros on each processor */ 3633 for (j=0; j<browners[i+1]-browners[i]; j++) { 3634 procsnz[i] += rowlengths[j]; 3635 } 3636 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3637 } 3638 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3639 } else { 3640 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3641 } 3642 3643 if (!rank) { 3644 /* determine max buffer needed and allocate it */ 3645 maxnz = procsnz[0]; 3646 for (i=1; i<size; i++) { 3647 maxnz = PetscMax(maxnz,procsnz[i]); 3648 } 3649 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3650 3651 /* read in my part of the matrix column indices */ 3652 nz = procsnz[0]; 3653 ierr = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr); 3654 mycols = ibuf; 3655 if (size == 1) nz -= extra_rows; 3656 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3657 if (size == 1) { 3658 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 3659 } 3660 3661 /* read in every ones (except the last) and ship off */ 3662 for (i=1; i<size-1; i++) { 3663 nz = procsnz[i]; 3664 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3665 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3666 } 3667 /* read in the stuff for the last proc */ 3668 if (size != 1) { 3669 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3670 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3671 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3672 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3673 } 3674 ierr = PetscFree(cols);CHKERRQ(ierr); 3675 } else { 3676 /* determine buffer space needed for message */ 3677 nz = 0; 3678 for (i=0; i<m; i++) { 3679 nz += locrowlens[i]; 3680 } 3681 ierr = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr); 3682 mycols = ibuf; 3683 /* receive message of column indices*/ 3684 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3685 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3686 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3687 } 3688 3689 /* loop over local rows, determining number of off diagonal entries */ 3690 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 3691 ierr = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 3692 rowcount = 0; nzcount = 0; 3693 for (i=0; i<mbs; i++) { 3694 dcount = 0; 3695 odcount = 0; 3696 for (j=0; j<bs; j++) { 3697 kmax = locrowlens[rowcount]; 3698 for (k=0; k<kmax; k++) { 3699 tmp = mycols[nzcount++]/bs; 3700 if (!mask[tmp]) { 3701 mask[tmp] = 1; 3702 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3703 else masked1[dcount++] = tmp; 3704 } 3705 } 3706 rowcount++; 3707 } 3708 3709 dlens[i] = dcount; 3710 odlens[i] = odcount; 3711 3712 /* zero out the mask elements we set */ 3713 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3714 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3715 } 3716 3717 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3718 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3719 3720 if (!rank) { 3721 ierr = PetscMalloc1(maxnz+1,&buf);CHKERRQ(ierr); 3722 /* read in my part of the matrix numerical values */ 3723 nz = procsnz[0]; 3724 vals = buf; 3725 mycols = ibuf; 3726 if (size == 1) nz -= extra_rows; 3727 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3728 if (size == 1) { 3729 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 3730 } 3731 3732 /* insert into matrix */ 3733 jj = rstart*bs; 3734 for (i=0; i<m; i++) { 3735 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3736 mycols += locrowlens[i]; 3737 vals += locrowlens[i]; 3738 jj++; 3739 } 3740 /* read in other processors (except the last one) and ship out */ 3741 for (i=1; i<size-1; i++) { 3742 nz = procsnz[i]; 3743 vals = buf; 3744 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3745 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3746 } 3747 /* the last proc */ 3748 if (size != 1) { 3749 nz = procsnz[i] - extra_rows; 3750 vals = buf; 3751 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3752 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3753 ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3754 } 3755 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3756 } else { 3757 /* receive numeric values */ 3758 ierr = PetscMalloc1(nz+1,&buf);CHKERRQ(ierr); 3759 3760 /* receive message of values*/ 3761 vals = buf; 3762 mycols = ibuf; 3763 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3764 3765 /* insert into matrix */ 3766 jj = rstart*bs; 3767 for (i=0; i<m; i++) { 3768 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3769 mycols += locrowlens[i]; 3770 vals += locrowlens[i]; 3771 jj++; 3772 } 3773 } 3774 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3775 ierr = PetscFree(buf);CHKERRQ(ierr); 3776 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3777 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3778 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3779 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3780 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3781 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3782 PetscFunctionReturn(0); 3783 } 3784 3785 #undef __FUNCT__ 3786 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3787 /*@ 3788 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3789 3790 Input Parameters: 3791 . mat - the matrix 3792 . fact - factor 3793 3794 Not Collective, each process can use a different factor 3795 3796 Level: advanced 3797 3798 Notes: 3799 This can also be set by the command line option: -mat_use_hash_table <fact> 3800 3801 .keywords: matrix, hashtable, factor, HT 3802 3803 .seealso: MatSetOption() 3804 @*/ 3805 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3806 { 3807 PetscErrorCode ierr; 3808 3809 PetscFunctionBegin; 3810 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3811 PetscFunctionReturn(0); 3812 } 3813 3814 #undef __FUNCT__ 3815 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3816 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3817 { 3818 Mat_MPIBAIJ *baij; 3819 3820 PetscFunctionBegin; 3821 baij = (Mat_MPIBAIJ*)mat->data; 3822 baij->ht_fact = fact; 3823 PetscFunctionReturn(0); 3824 } 3825 3826 #undef __FUNCT__ 3827 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3828 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3829 { 3830 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3831 3832 PetscFunctionBegin; 3833 if (Ad) *Ad = a->A; 3834 if (Ao) *Ao = a->B; 3835 if (colmap) *colmap = a->garray; 3836 PetscFunctionReturn(0); 3837 } 3838 3839 /* 3840 Special version for direct calls from Fortran (to eliminate two function call overheads 3841 */ 3842 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3843 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3844 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3845 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3846 #endif 3847 3848 #undef __FUNCT__ 3849 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3850 /*@C 3851 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3852 3853 Collective on Mat 3854 3855 Input Parameters: 3856 + mat - the matrix 3857 . min - number of input rows 3858 . im - input rows 3859 . nin - number of input columns 3860 . in - input columns 3861 . v - numerical values input 3862 - addvin - INSERT_VALUES or ADD_VALUES 3863 3864 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3865 3866 Level: advanced 3867 3868 .seealso: MatSetValuesBlocked() 3869 @*/ 3870 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3871 { 3872 /* convert input arguments to C version */ 3873 Mat mat = *matin; 3874 PetscInt m = *min, n = *nin; 3875 InsertMode addv = *addvin; 3876 3877 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3878 const MatScalar *value; 3879 MatScalar *barray = baij->barray; 3880 PetscBool roworiented = baij->roworiented; 3881 PetscErrorCode ierr; 3882 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3883 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3884 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3885 3886 PetscFunctionBegin; 3887 /* tasks normally handled by MatSetValuesBlocked() */ 3888 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3889 #if defined(PETSC_USE_DEBUG) 3890 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3891 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3892 #endif 3893 if (mat->assembled) { 3894 mat->was_assembled = PETSC_TRUE; 3895 mat->assembled = PETSC_FALSE; 3896 } 3897 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3898 3899 3900 if (!barray) { 3901 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 3902 baij->barray = barray; 3903 } 3904 3905 if (roworiented) stepval = (n-1)*bs; 3906 else stepval = (m-1)*bs; 3907 3908 for (i=0; i<m; i++) { 3909 if (im[i] < 0) continue; 3910 #if defined(PETSC_USE_DEBUG) 3911 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3912 #endif 3913 if (im[i] >= rstart && im[i] < rend) { 3914 row = im[i] - rstart; 3915 for (j=0; j<n; j++) { 3916 /* If NumCol = 1 then a copy is not required */ 3917 if ((roworiented) && (n == 1)) { 3918 barray = (MatScalar*)v + i*bs2; 3919 } else if ((!roworiented) && (m == 1)) { 3920 barray = (MatScalar*)v + j*bs2; 3921 } else { /* Here a copy is required */ 3922 if (roworiented) { 3923 value = v + i*(stepval+bs)*bs + j*bs; 3924 } else { 3925 value = v + j*(stepval+bs)*bs + i*bs; 3926 } 3927 for (ii=0; ii<bs; ii++,value+=stepval) { 3928 for (jj=0; jj<bs; jj++) { 3929 *barray++ = *value++; 3930 } 3931 } 3932 barray -=bs2; 3933 } 3934 3935 if (in[j] >= cstart && in[j] < cend) { 3936 col = in[j] - cstart; 3937 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 3938 } else if (in[j] < 0) continue; 3939 #if defined(PETSC_USE_DEBUG) 3940 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 3941 #endif 3942 else { 3943 if (mat->was_assembled) { 3944 if (!baij->colmap) { 3945 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3946 } 3947 3948 #if defined(PETSC_USE_DEBUG) 3949 #if defined(PETSC_USE_CTABLE) 3950 { PetscInt data; 3951 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3952 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3953 } 3954 #else 3955 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3956 #endif 3957 #endif 3958 #if defined(PETSC_USE_CTABLE) 3959 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3960 col = (col - 1)/bs; 3961 #else 3962 col = (baij->colmap[in[j]] - 1)/bs; 3963 #endif 3964 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3965 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3966 col = in[j]; 3967 } 3968 } else col = in[j]; 3969 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 3970 } 3971 } 3972 } else { 3973 if (!baij->donotstash) { 3974 if (roworiented) { 3975 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3976 } else { 3977 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3978 } 3979 } 3980 } 3981 } 3982 3983 /* task normally handled by MatSetValuesBlocked() */ 3984 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3985 PetscFunctionReturn(0); 3986 } 3987 3988 #undef __FUNCT__ 3989 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 3990 /*@ 3991 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 3992 CSR format the local rows. 3993 3994 Collective on MPI_Comm 3995 3996 Input Parameters: 3997 + comm - MPI communicator 3998 . bs - the block size, only a block size of 1 is supported 3999 . m - number of local rows (Cannot be PETSC_DECIDE) 4000 . n - This value should be the same as the local size used in creating the 4001 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4002 calculated if N is given) For square matrices n is almost always m. 4003 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4004 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4005 . i - row indices 4006 . j - column indices 4007 - a - matrix values 4008 4009 Output Parameter: 4010 . mat - the matrix 4011 4012 Level: intermediate 4013 4014 Notes: 4015 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4016 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4017 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4018 4019 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 4020 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 4021 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 4022 with column-major ordering within blocks. 4023 4024 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4025 4026 .keywords: matrix, aij, compressed row, sparse, parallel 4027 4028 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4029 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4030 @*/ 4031 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4032 { 4033 PetscErrorCode ierr; 4034 4035 PetscFunctionBegin; 4036 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4037 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4038 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4039 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4040 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 4041 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 4042 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 4043 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 4044 PetscFunctionReturn(0); 4045 } 4046 4047 #undef __FUNCT__ 4048 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIBAIJ" 4049 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4050 { 4051 PetscErrorCode ierr; 4052 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 4053 PetscInt *indx; 4054 PetscScalar *values; 4055 4056 PetscFunctionBegin; 4057 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4058 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4059 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inmat->data; 4060 PetscInt *dnz,*onz,sum,mbs,Nbs; 4061 PetscInt *bindx,rmax=a->rmax,j; 4062 4063 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4064 mbs = m/bs; Nbs = N/cbs; 4065 if (n == PETSC_DECIDE) { 4066 ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr); 4067 } 4068 /* Check sum(n) = Nbs */ 4069 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4070 if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs); 4071 4072 ierr = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4073 rstart -= mbs; 4074 4075 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 4076 ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr); 4077 for (i=0; i<mbs; i++) { 4078 ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 4079 nnz = nnz/bs; 4080 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 4081 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 4082 ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 4083 } 4084 ierr = PetscFree(bindx);CHKERRQ(ierr); 4085 4086 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4087 ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4088 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4089 ierr = MatSetType(*outmat,MATMPIBAIJ);CHKERRQ(ierr); 4090 ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 4091 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4092 } 4093 4094 /* numeric phase */ 4095 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4096 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4097 4098 for (i=0; i<m; i++) { 4099 ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4100 Ii = i + rstart; 4101 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4102 ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4103 } 4104 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4105 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4106 PetscFunctionReturn(0); 4107 } 4108