1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 #undef __FUNCT__ 8 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 9 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 10 { 11 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 12 PetscErrorCode ierr; 13 PetscInt i,*idxb = 0; 14 PetscScalar *va,*vb; 15 Vec vtmp; 16 17 PetscFunctionBegin; 18 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 19 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 20 if (idx) { 21 for (i=0; i<A->rmap->n; i++) { 22 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 23 } 24 } 25 26 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 27 if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);} 28 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 29 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 30 31 for (i=0; i<A->rmap->n; i++) { 32 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 33 va[i] = vb[i]; 34 if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs); 35 } 36 } 37 38 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 39 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 40 ierr = PetscFree(idxb);CHKERRQ(ierr); 41 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 42 PetscFunctionReturn(0); 43 } 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 47 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 48 { 49 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 54 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 55 PetscFunctionReturn(0); 56 } 57 58 #undef __FUNCT__ 59 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 60 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 61 { 62 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 63 PetscErrorCode ierr; 64 65 PetscFunctionBegin; 66 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 67 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 68 PetscFunctionReturn(0); 69 } 70 71 /* 72 Local utility routine that creates a mapping from the global column 73 number to the local number in the off-diagonal part of the local 74 storage of the matrix. This is done in a non scalable way since the 75 length of colmap equals the global matrix length. 76 */ 77 #undef __FUNCT__ 78 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private" 79 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 80 { 81 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 82 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 83 PetscErrorCode ierr; 84 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 85 86 PetscFunctionBegin; 87 #if defined(PETSC_USE_CTABLE) 88 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 89 for (i=0; i<nbs; i++) { 90 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 91 } 92 #else 93 ierr = PetscMalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 94 ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 95 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 96 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 97 #endif 98 PetscFunctionReturn(0); 99 } 100 101 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 102 { \ 103 \ 104 brow = row/bs; \ 105 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 106 rmax = aimax[brow]; nrow = ailen[brow]; \ 107 bcol = col/bs; \ 108 ridx = row % bs; cidx = col % bs; \ 109 low = 0; high = nrow; \ 110 while (high-low > 3) { \ 111 t = (low+high)/2; \ 112 if (rp[t] > bcol) high = t; \ 113 else low = t; \ 114 } \ 115 for (_i=low; _i<high; _i++) { \ 116 if (rp[_i] > bcol) break; \ 117 if (rp[_i] == bcol) { \ 118 bap = ap + bs2*_i + bs*cidx + ridx; \ 119 if (addv == ADD_VALUES) *bap += value; \ 120 else *bap = value; \ 121 goto a_noinsert; \ 122 } \ 123 } \ 124 if (a->nonew == 1) goto a_noinsert; \ 125 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 126 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 127 N = nrow++ - 1; \ 128 /* shift up all the later entries in this row */ \ 129 for (ii=N; ii>=_i; ii--) { \ 130 rp[ii+1] = rp[ii]; \ 131 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 132 } \ 133 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 134 rp[_i] = bcol; \ 135 ap[bs2*_i + bs*cidx + ridx] = value; \ 136 a_noinsert:; \ 137 ailen[brow] = nrow; \ 138 } 139 140 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 141 { \ 142 brow = row/bs; \ 143 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 144 rmax = bimax[brow]; nrow = bilen[brow]; \ 145 bcol = col/bs; \ 146 ridx = row % bs; cidx = col % bs; \ 147 low = 0; high = nrow; \ 148 while (high-low > 3) { \ 149 t = (low+high)/2; \ 150 if (rp[t] > bcol) high = t; \ 151 else low = t; \ 152 } \ 153 for (_i=low; _i<high; _i++) { \ 154 if (rp[_i] > bcol) break; \ 155 if (rp[_i] == bcol) { \ 156 bap = ap + bs2*_i + bs*cidx + ridx; \ 157 if (addv == ADD_VALUES) *bap += value; \ 158 else *bap = value; \ 159 goto b_noinsert; \ 160 } \ 161 } \ 162 if (b->nonew == 1) goto b_noinsert; \ 163 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 164 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 165 N = nrow++ - 1; \ 166 /* shift up all the later entries in this row */ \ 167 for (ii=N; ii>=_i; ii--) { \ 168 rp[ii+1] = rp[ii]; \ 169 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 170 } \ 171 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 172 rp[_i] = bcol; \ 173 ap[bs2*_i + bs*cidx + ridx] = value; \ 174 b_noinsert:; \ 175 bilen[brow] = nrow; \ 176 } 177 178 #undef __FUNCT__ 179 #define __FUNCT__ "MatSetValues_MPIBAIJ" 180 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 181 { 182 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 183 MatScalar value; 184 PetscBool roworiented = baij->roworiented; 185 PetscErrorCode ierr; 186 PetscInt i,j,row,col; 187 PetscInt rstart_orig=mat->rmap->rstart; 188 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 189 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 190 191 /* Some Variables required in the macro */ 192 Mat A = baij->A; 193 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 194 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 195 MatScalar *aa =a->a; 196 197 Mat B = baij->B; 198 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 199 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 200 MatScalar *ba =b->a; 201 202 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 203 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 204 MatScalar *ap,*bap; 205 206 PetscFunctionBegin; 207 for (i=0; i<m; i++) { 208 if (im[i] < 0) continue; 209 #if defined(PETSC_USE_DEBUG) 210 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 211 #endif 212 if (im[i] >= rstart_orig && im[i] < rend_orig) { 213 row = im[i] - rstart_orig; 214 for (j=0; j<n; j++) { 215 if (in[j] >= cstart_orig && in[j] < cend_orig) { 216 col = in[j] - cstart_orig; 217 if (roworiented) value = v[i*n+j]; 218 else value = v[i+j*m]; 219 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 220 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 221 } else if (in[j] < 0) continue; 222 #if defined(PETSC_USE_DEBUG) 223 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 224 #endif 225 else { 226 if (mat->was_assembled) { 227 if (!baij->colmap) { 228 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 229 } 230 #if defined(PETSC_USE_CTABLE) 231 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 232 col = col - 1; 233 #else 234 col = baij->colmap[in[j]/bs] - 1; 235 #endif 236 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 237 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 238 col = in[j]; 239 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 240 B = baij->B; 241 b = (Mat_SeqBAIJ*)(B)->data; 242 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 243 ba =b->a; 244 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 245 else col += in[j]%bs; 246 } else col = in[j]; 247 if (roworiented) value = v[i*n+j]; 248 else value = v[i+j*m]; 249 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 250 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 251 } 252 } 253 } else { 254 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 255 if (!baij->donotstash) { 256 mat->assembled = PETSC_FALSE; 257 if (roworiented) { 258 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 259 } else { 260 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 261 } 262 } 263 } 264 } 265 PetscFunctionReturn(0); 266 } 267 268 #undef __FUNCT__ 269 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ_Inlined" 270 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 271 { 272 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 273 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 274 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 275 PetscErrorCode ierr; 276 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 277 PetscBool roworiented=a->roworiented; 278 const PetscScalar *value = v; 279 MatScalar *ap,*aa = a->a,*bap; 280 281 PetscFunctionBegin; 282 rp = aj + ai[row]; 283 ap = aa + bs2*ai[row]; 284 rmax = imax[row]; 285 nrow = ailen[row]; 286 low = 0; 287 high = nrow; 288 value = v; 289 low = 0; 290 high = nrow; 291 while (high-low > 7) { 292 t = (low+high)/2; 293 if (rp[t] > col) high = t; 294 else low = t; 295 } 296 for (i=low; i<high; i++) { 297 if (rp[i] > col) break; 298 if (rp[i] == col) { 299 bap = ap + bs2*i; 300 if (roworiented) { 301 if (is == ADD_VALUES) { 302 for (ii=0; ii<bs; ii++) { 303 for (jj=ii; jj<bs2; jj+=bs) { 304 bap[jj] += *value++; 305 } 306 } 307 } else { 308 for (ii=0; ii<bs; ii++) { 309 for (jj=ii; jj<bs2; jj+=bs) { 310 bap[jj] = *value++; 311 } 312 } 313 } 314 } else { 315 if (is == ADD_VALUES) { 316 for (ii=0; ii<bs; ii++,value+=bs) { 317 for (jj=0; jj<bs; jj++) { 318 bap[jj] += value[jj]; 319 } 320 bap += bs; 321 } 322 } else { 323 for (ii=0; ii<bs; ii++,value+=bs) { 324 for (jj=0; jj<bs; jj++) { 325 bap[jj] = value[jj]; 326 } 327 bap += bs; 328 } 329 } 330 } 331 goto noinsert2; 332 } 333 } 334 if (nonew == 1) goto noinsert2; 335 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 336 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 337 N = nrow++ - 1; high++; 338 /* shift up all the later entries in this row */ 339 for (ii=N; ii>=i; ii--) { 340 rp[ii+1] = rp[ii]; 341 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 342 } 343 if (N >= i) { 344 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 345 } 346 rp[i] = col; 347 bap = ap + bs2*i; 348 if (roworiented) { 349 for (ii=0; ii<bs; ii++) { 350 for (jj=ii; jj<bs2; jj+=bs) { 351 bap[jj] = *value++; 352 } 353 } 354 } else { 355 for (ii=0; ii<bs; ii++) { 356 for (jj=0; jj<bs; jj++) { 357 *bap++ = *value++; 358 } 359 } 360 } 361 noinsert2:; 362 ailen[row] = nrow; 363 PetscFunctionReturn(0); 364 } 365 366 #undef __FUNCT__ 367 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 368 /* 369 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 370 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 371 */ 372 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 373 { 374 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 375 const PetscScalar *value; 376 MatScalar *barray = baij->barray; 377 PetscBool roworiented = baij->roworiented; 378 PetscErrorCode ierr; 379 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 380 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 381 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 382 383 PetscFunctionBegin; 384 if (!barray) { 385 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 386 baij->barray = barray; 387 } 388 389 if (roworiented) stepval = (n-1)*bs; 390 else stepval = (m-1)*bs; 391 392 for (i=0; i<m; i++) { 393 if (im[i] < 0) continue; 394 #if defined(PETSC_USE_DEBUG) 395 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 396 #endif 397 if (im[i] >= rstart && im[i] < rend) { 398 row = im[i] - rstart; 399 for (j=0; j<n; j++) { 400 /* If NumCol = 1 then a copy is not required */ 401 if ((roworiented) && (n == 1)) { 402 barray = (MatScalar*)v + i*bs2; 403 } else if ((!roworiented) && (m == 1)) { 404 barray = (MatScalar*)v + j*bs2; 405 } else { /* Here a copy is required */ 406 if (roworiented) { 407 value = v + (i*(stepval+bs) + j)*bs; 408 } else { 409 value = v + (j*(stepval+bs) + i)*bs; 410 } 411 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 412 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 413 barray += bs; 414 } 415 barray -= bs2; 416 } 417 418 if (in[j] >= cstart && in[j] < cend) { 419 col = in[j] - cstart; 420 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 421 } else if (in[j] < 0) continue; 422 #if defined(PETSC_USE_DEBUG) 423 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 424 #endif 425 else { 426 if (mat->was_assembled) { 427 if (!baij->colmap) { 428 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 429 } 430 431 #if defined(PETSC_USE_DEBUG) 432 #if defined(PETSC_USE_CTABLE) 433 { PetscInt data; 434 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 435 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 436 } 437 #else 438 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 439 #endif 440 #endif 441 #if defined(PETSC_USE_CTABLE) 442 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 443 col = (col - 1)/bs; 444 #else 445 col = (baij->colmap[in[j]] - 1)/bs; 446 #endif 447 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 448 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 449 col = in[j]; 450 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]); 451 } else col = in[j]; 452 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 453 } 454 } 455 } else { 456 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 457 if (!baij->donotstash) { 458 if (roworiented) { 459 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 460 } else { 461 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 462 } 463 } 464 } 465 } 466 PetscFunctionReturn(0); 467 } 468 469 #define HASH_KEY 0.6180339887 470 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 471 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 472 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 473 #undef __FUNCT__ 474 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 475 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 476 { 477 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 478 PetscBool roworiented = baij->roworiented; 479 PetscErrorCode ierr; 480 PetscInt i,j,row,col; 481 PetscInt rstart_orig=mat->rmap->rstart; 482 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 483 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 484 PetscReal tmp; 485 MatScalar **HD = baij->hd,value; 486 #if defined(PETSC_USE_DEBUG) 487 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 488 #endif 489 490 PetscFunctionBegin; 491 for (i=0; i<m; i++) { 492 #if defined(PETSC_USE_DEBUG) 493 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 494 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 495 #endif 496 row = im[i]; 497 if (row >= rstart_orig && row < rend_orig) { 498 for (j=0; j<n; j++) { 499 col = in[j]; 500 if (roworiented) value = v[i*n+j]; 501 else value = v[i+j*m]; 502 /* Look up PetscInto the Hash Table */ 503 key = (row/bs)*Nbs+(col/bs)+1; 504 h1 = HASH(size,key,tmp); 505 506 507 idx = h1; 508 #if defined(PETSC_USE_DEBUG) 509 insert_ct++; 510 total_ct++; 511 if (HT[idx] != key) { 512 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 513 if (idx == size) { 514 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 515 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 516 } 517 } 518 #else 519 if (HT[idx] != key) { 520 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 521 if (idx == size) { 522 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 523 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 524 } 525 } 526 #endif 527 /* A HASH table entry is found, so insert the values at the correct address */ 528 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 529 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 530 } 531 } else if (!baij->donotstash) { 532 if (roworiented) { 533 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 534 } else { 535 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 536 } 537 } 538 } 539 #if defined(PETSC_USE_DEBUG) 540 baij->ht_total_ct = total_ct; 541 baij->ht_insert_ct = insert_ct; 542 #endif 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 548 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 549 { 550 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 551 PetscBool roworiented = baij->roworiented; 552 PetscErrorCode ierr; 553 PetscInt i,j,ii,jj,row,col; 554 PetscInt rstart=baij->rstartbs; 555 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 556 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 557 PetscReal tmp; 558 MatScalar **HD = baij->hd,*baij_a; 559 const PetscScalar *v_t,*value; 560 #if defined(PETSC_USE_DEBUG) 561 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 562 #endif 563 564 PetscFunctionBegin; 565 if (roworiented) stepval = (n-1)*bs; 566 else stepval = (m-1)*bs; 567 568 for (i=0; i<m; i++) { 569 #if defined(PETSC_USE_DEBUG) 570 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 571 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 572 #endif 573 row = im[i]; 574 v_t = v + i*nbs2; 575 if (row >= rstart && row < rend) { 576 for (j=0; j<n; j++) { 577 col = in[j]; 578 579 /* Look up into the Hash Table */ 580 key = row*Nbs+col+1; 581 h1 = HASH(size,key,tmp); 582 583 idx = h1; 584 #if defined(PETSC_USE_DEBUG) 585 total_ct++; 586 insert_ct++; 587 if (HT[idx] != key) { 588 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 589 if (idx == size) { 590 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 591 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 592 } 593 } 594 #else 595 if (HT[idx] != key) { 596 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 597 if (idx == size) { 598 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 599 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 600 } 601 } 602 #endif 603 baij_a = HD[idx]; 604 if (roworiented) { 605 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 606 /* value = v + (i*(stepval+bs)+j)*bs; */ 607 value = v_t; 608 v_t += bs; 609 if (addv == ADD_VALUES) { 610 for (ii=0; ii<bs; ii++,value+=stepval) { 611 for (jj=ii; jj<bs2; jj+=bs) { 612 baij_a[jj] += *value++; 613 } 614 } 615 } else { 616 for (ii=0; ii<bs; ii++,value+=stepval) { 617 for (jj=ii; jj<bs2; jj+=bs) { 618 baij_a[jj] = *value++; 619 } 620 } 621 } 622 } else { 623 value = v + j*(stepval+bs)*bs + i*bs; 624 if (addv == ADD_VALUES) { 625 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 626 for (jj=0; jj<bs; jj++) { 627 baij_a[jj] += *value++; 628 } 629 } 630 } else { 631 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 632 for (jj=0; jj<bs; jj++) { 633 baij_a[jj] = *value++; 634 } 635 } 636 } 637 } 638 } 639 } else { 640 if (!baij->donotstash) { 641 if (roworiented) { 642 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 643 } else { 644 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 645 } 646 } 647 } 648 } 649 #if defined(PETSC_USE_DEBUG) 650 baij->ht_total_ct = total_ct; 651 baij->ht_insert_ct = insert_ct; 652 #endif 653 PetscFunctionReturn(0); 654 } 655 656 #undef __FUNCT__ 657 #define __FUNCT__ "MatGetValues_MPIBAIJ" 658 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 659 { 660 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 661 PetscErrorCode ierr; 662 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 663 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 664 665 PetscFunctionBegin; 666 for (i=0; i<m; i++) { 667 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 668 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 669 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 670 row = idxm[i] - bsrstart; 671 for (j=0; j<n; j++) { 672 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 673 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 674 if (idxn[j] >= bscstart && idxn[j] < bscend) { 675 col = idxn[j] - bscstart; 676 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 677 } else { 678 if (!baij->colmap) { 679 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 680 } 681 #if defined(PETSC_USE_CTABLE) 682 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 683 data--; 684 #else 685 data = baij->colmap[idxn[j]/bs]-1; 686 #endif 687 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 688 else { 689 col = data + idxn[j]%bs; 690 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 691 } 692 } 693 } 694 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 695 } 696 PetscFunctionReturn(0); 697 } 698 699 #undef __FUNCT__ 700 #define __FUNCT__ "MatNorm_MPIBAIJ" 701 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 702 { 703 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 704 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 705 PetscErrorCode ierr; 706 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 707 PetscReal sum = 0.0; 708 MatScalar *v; 709 710 PetscFunctionBegin; 711 if (baij->size == 1) { 712 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 713 } else { 714 if (type == NORM_FROBENIUS) { 715 v = amat->a; 716 nz = amat->nz*bs2; 717 for (i=0; i<nz; i++) { 718 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 719 } 720 v = bmat->a; 721 nz = bmat->nz*bs2; 722 for (i=0; i<nz; i++) { 723 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 724 } 725 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 726 *nrm = PetscSqrtReal(*nrm); 727 } else if (type == NORM_1) { /* max column sum */ 728 PetscReal *tmp,*tmp2; 729 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 730 ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr); 731 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 732 v = amat->a; jj = amat->j; 733 for (i=0; i<amat->nz; i++) { 734 for (j=0; j<bs; j++) { 735 col = bs*(cstart + *jj) + j; /* column index */ 736 for (row=0; row<bs; row++) { 737 tmp[col] += PetscAbsScalar(*v); v++; 738 } 739 } 740 jj++; 741 } 742 v = bmat->a; jj = bmat->j; 743 for (i=0; i<bmat->nz; i++) { 744 for (j=0; j<bs; j++) { 745 col = bs*garray[*jj] + j; 746 for (row=0; row<bs; row++) { 747 tmp[col] += PetscAbsScalar(*v); v++; 748 } 749 } 750 jj++; 751 } 752 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 753 *nrm = 0.0; 754 for (j=0; j<mat->cmap->N; j++) { 755 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 756 } 757 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 758 } else if (type == NORM_INFINITY) { /* max row sum */ 759 PetscReal *sums; 760 ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr); 761 sum = 0.0; 762 for (j=0; j<amat->mbs; j++) { 763 for (row=0; row<bs; row++) sums[row] = 0.0; 764 v = amat->a + bs2*amat->i[j]; 765 nz = amat->i[j+1]-amat->i[j]; 766 for (i=0; i<nz; i++) { 767 for (col=0; col<bs; col++) { 768 for (row=0; row<bs; row++) { 769 sums[row] += PetscAbsScalar(*v); v++; 770 } 771 } 772 } 773 v = bmat->a + bs2*bmat->i[j]; 774 nz = bmat->i[j+1]-bmat->i[j]; 775 for (i=0; i<nz; i++) { 776 for (col=0; col<bs; col++) { 777 for (row=0; row<bs; row++) { 778 sums[row] += PetscAbsScalar(*v); v++; 779 } 780 } 781 } 782 for (row=0; row<bs; row++) { 783 if (sums[row] > sum) sum = sums[row]; 784 } 785 } 786 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 787 ierr = PetscFree(sums);CHKERRQ(ierr); 788 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 789 } 790 PetscFunctionReturn(0); 791 } 792 793 /* 794 Creates the hash table, and sets the table 795 This table is created only once. 796 If new entried need to be added to the matrix 797 then the hash table has to be destroyed and 798 recreated. 799 */ 800 #undef __FUNCT__ 801 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 802 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 803 { 804 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 805 Mat A = baij->A,B=baij->B; 806 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 807 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 808 PetscErrorCode ierr; 809 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 810 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 811 PetscInt *HT,key; 812 MatScalar **HD; 813 PetscReal tmp; 814 #if defined(PETSC_USE_INFO) 815 PetscInt ct=0,max=0; 816 #endif 817 818 PetscFunctionBegin; 819 if (baij->ht) PetscFunctionReturn(0); 820 821 baij->ht_size = (PetscInt)(factor*nz); 822 ht_size = baij->ht_size; 823 824 /* Allocate Memory for Hash Table */ 825 ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr); 826 HD = baij->hd; 827 HT = baij->ht; 828 829 /* Loop Over A */ 830 for (i=0; i<a->mbs; i++) { 831 for (j=ai[i]; j<ai[i+1]; j++) { 832 row = i+rstart; 833 col = aj[j]+cstart; 834 835 key = row*Nbs + col + 1; 836 h1 = HASH(ht_size,key,tmp); 837 for (k=0; k<ht_size; k++) { 838 if (!HT[(h1+k)%ht_size]) { 839 HT[(h1+k)%ht_size] = key; 840 HD[(h1+k)%ht_size] = a->a + j*bs2; 841 break; 842 #if defined(PETSC_USE_INFO) 843 } else { 844 ct++; 845 #endif 846 } 847 } 848 #if defined(PETSC_USE_INFO) 849 if (k> max) max = k; 850 #endif 851 } 852 } 853 /* Loop Over B */ 854 for (i=0; i<b->mbs; i++) { 855 for (j=bi[i]; j<bi[i+1]; j++) { 856 row = i+rstart; 857 col = garray[bj[j]]; 858 key = row*Nbs + col + 1; 859 h1 = HASH(ht_size,key,tmp); 860 for (k=0; k<ht_size; k++) { 861 if (!HT[(h1+k)%ht_size]) { 862 HT[(h1+k)%ht_size] = key; 863 HD[(h1+k)%ht_size] = b->a + j*bs2; 864 break; 865 #if defined(PETSC_USE_INFO) 866 } else { 867 ct++; 868 #endif 869 } 870 } 871 #if defined(PETSC_USE_INFO) 872 if (k> max) max = k; 873 #endif 874 } 875 } 876 877 /* Print Summary */ 878 #if defined(PETSC_USE_INFO) 879 for (i=0,j=0; i<ht_size; i++) { 880 if (HT[i]) j++; 881 } 882 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 883 #endif 884 PetscFunctionReturn(0); 885 } 886 887 #undef __FUNCT__ 888 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 889 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 890 { 891 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 892 PetscErrorCode ierr; 893 PetscInt nstash,reallocs; 894 InsertMode addv; 895 896 PetscFunctionBegin; 897 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 898 899 /* make sure all processors are either in INSERTMODE or ADDMODE */ 900 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 901 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 902 mat->insertmode = addv; /* in case this processor had no cache */ 903 904 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 905 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 906 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 907 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 908 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 909 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 910 PetscFunctionReturn(0); 911 } 912 913 #undef __FUNCT__ 914 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 915 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 916 { 917 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 918 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 919 PetscErrorCode ierr; 920 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 921 PetscInt *row,*col; 922 PetscBool r1,r2,r3,other_disassembled; 923 MatScalar *val; 924 InsertMode addv = mat->insertmode; 925 PetscMPIInt n; 926 927 PetscFunctionBegin; 928 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 929 if (!baij->donotstash && !mat->nooffprocentries) { 930 while (1) { 931 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 932 if (!flg) break; 933 934 for (i=0; i<n;) { 935 /* Now identify the consecutive vals belonging to the same row */ 936 for (j=i,rstart=row[j]; j<n; j++) { 937 if (row[j] != rstart) break; 938 } 939 if (j < n) ncols = j-i; 940 else ncols = n-i; 941 /* Now assemble all these values with a single function call */ 942 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 943 i = j; 944 } 945 } 946 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 947 /* Now process the block-stash. Since the values are stashed column-oriented, 948 set the roworiented flag to column oriented, and after MatSetValues() 949 restore the original flags */ 950 r1 = baij->roworiented; 951 r2 = a->roworiented; 952 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 953 954 baij->roworiented = PETSC_FALSE; 955 a->roworiented = PETSC_FALSE; 956 957 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 958 while (1) { 959 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 960 if (!flg) break; 961 962 for (i=0; i<n;) { 963 /* Now identify the consecutive vals belonging to the same row */ 964 for (j=i,rstart=row[j]; j<n; j++) { 965 if (row[j] != rstart) break; 966 } 967 if (j < n) ncols = j-i; 968 else ncols = n-i; 969 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 970 i = j; 971 } 972 } 973 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 974 975 baij->roworiented = r1; 976 a->roworiented = r2; 977 978 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 979 } 980 981 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 982 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 983 984 /* determine if any processor has disassembled, if so we must 985 also disassemble ourselfs, in order that we may reassemble. */ 986 /* 987 if nonzero structure of submatrix B cannot change then we know that 988 no processor disassembled thus we can skip this stuff 989 */ 990 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 991 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 992 if (mat->was_assembled && !other_disassembled) { 993 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 994 } 995 } 996 997 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 998 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 999 } 1000 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1001 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1002 1003 #if defined(PETSC_USE_INFO) 1004 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1005 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 1006 1007 baij->ht_total_ct = 0; 1008 baij->ht_insert_ct = 0; 1009 } 1010 #endif 1011 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1012 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1013 1014 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1015 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1016 } 1017 1018 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1019 1020 baij->rowvalues = 0; 1021 1022 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 1023 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 1024 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 1025 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1026 } 1027 PetscFunctionReturn(0); 1028 } 1029 1030 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer); 1031 #include <petscdraw.h> 1032 #undef __FUNCT__ 1033 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 1034 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1035 { 1036 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1037 PetscErrorCode ierr; 1038 PetscMPIInt rank = baij->rank; 1039 PetscInt bs = mat->rmap->bs; 1040 PetscBool iascii,isdraw; 1041 PetscViewer sviewer; 1042 PetscViewerFormat format; 1043 1044 PetscFunctionBegin; 1045 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1046 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1047 if (iascii) { 1048 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1049 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1050 MatInfo info; 1051 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1052 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1053 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1054 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 1055 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 1056 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1057 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1058 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1059 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1060 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1061 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1062 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1063 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1064 PetscFunctionReturn(0); 1065 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1066 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1067 PetscFunctionReturn(0); 1068 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1069 PetscFunctionReturn(0); 1070 } 1071 } 1072 1073 if (isdraw) { 1074 PetscDraw draw; 1075 PetscBool isnull; 1076 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1077 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1078 } 1079 1080 { 1081 /* assemble the entire matrix onto first processor. */ 1082 Mat A; 1083 Mat_SeqBAIJ *Aloc; 1084 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1085 MatScalar *a; 1086 const char *matname; 1087 1088 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1089 /* Perhaps this should be the type of mat? */ 1090 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1091 if (!rank) { 1092 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1093 } else { 1094 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1095 } 1096 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1097 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 1098 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1099 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1100 1101 /* copy over the A part */ 1102 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1103 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1104 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1105 1106 for (i=0; i<mbs; i++) { 1107 rvals[0] = bs*(baij->rstartbs + i); 1108 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1109 for (j=ai[i]; j<ai[i+1]; j++) { 1110 col = (baij->cstartbs+aj[j])*bs; 1111 for (k=0; k<bs; k++) { 1112 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1113 col++; a += bs; 1114 } 1115 } 1116 } 1117 /* copy over the B part */ 1118 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1119 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1120 for (i=0; i<mbs; i++) { 1121 rvals[0] = bs*(baij->rstartbs + i); 1122 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1123 for (j=ai[i]; j<ai[i+1]; j++) { 1124 col = baij->garray[aj[j]]*bs; 1125 for (k=0; k<bs; k++) { 1126 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1127 col++; a += bs; 1128 } 1129 } 1130 } 1131 ierr = PetscFree(rvals);CHKERRQ(ierr); 1132 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1133 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1134 /* 1135 Everyone has to call to draw the matrix since the graphics waits are 1136 synchronized across all processors that share the PetscDraw object 1137 */ 1138 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1139 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 1140 if (!rank) { 1141 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 1142 ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1143 } 1144 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1145 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1146 ierr = MatDestroy(&A);CHKERRQ(ierr); 1147 } 1148 PetscFunctionReturn(0); 1149 } 1150 1151 #undef __FUNCT__ 1152 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1153 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1154 { 1155 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1156 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)a->A->data; 1157 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 1158 PetscErrorCode ierr; 1159 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1160 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1161 int fd; 1162 PetscScalar *column_values; 1163 FILE *file; 1164 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1165 PetscInt message_count,flowcontrolcount; 1166 1167 PetscFunctionBegin; 1168 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1169 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1170 nz = bs2*(A->nz + B->nz); 1171 rlen = mat->rmap->n; 1172 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1173 if (!rank) { 1174 header[0] = MAT_FILE_CLASSID; 1175 header[1] = mat->rmap->N; 1176 header[2] = mat->cmap->N; 1177 1178 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1179 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1180 /* get largest number of rows any processor has */ 1181 range = mat->rmap->range; 1182 for (i=1; i<size; i++) { 1183 rlen = PetscMax(rlen,range[i+1] - range[i]); 1184 } 1185 } else { 1186 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1187 } 1188 1189 ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr); 1190 /* compute lengths of each row */ 1191 for (i=0; i<a->mbs; i++) { 1192 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1193 } 1194 /* store the row lengths to the file */ 1195 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1196 if (!rank) { 1197 MPI_Status status; 1198 ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr); 1199 rlen = (range[1] - range[0])/bs; 1200 for (i=0; i<rlen; i++) { 1201 for (j=0; j<bs; j++) { 1202 row_lens[i*bs+j] = bs*crow_lens[i]; 1203 } 1204 } 1205 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1206 for (i=1; i<size; i++) { 1207 rlen = (range[i+1] - range[i])/bs; 1208 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1209 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1210 for (k=0; k<rlen; k++) { 1211 for (j=0; j<bs; j++) { 1212 row_lens[k*bs+j] = bs*crow_lens[k]; 1213 } 1214 } 1215 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1216 } 1217 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1218 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1219 } else { 1220 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1221 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1222 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1223 } 1224 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1225 1226 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1227 information needed to make it for each row from a block row. This does require more communication but still not more than 1228 the communication needed for the nonzero values */ 1229 nzmax = nz; /* space a largest processor needs */ 1230 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1231 ierr = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr); 1232 cnt = 0; 1233 for (i=0; i<a->mbs; i++) { 1234 pcnt = cnt; 1235 for (j=B->i[i]; j<B->i[i+1]; j++) { 1236 if ((col = garray[B->j[j]]) > cstart) break; 1237 for (l=0; l<bs; l++) { 1238 column_indices[cnt++] = bs*col+l; 1239 } 1240 } 1241 for (k=A->i[i]; k<A->i[i+1]; k++) { 1242 for (l=0; l<bs; l++) { 1243 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1244 } 1245 } 1246 for (; j<B->i[i+1]; j++) { 1247 for (l=0; l<bs; l++) { 1248 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1249 } 1250 } 1251 len = cnt - pcnt; 1252 for (k=1; k<bs; k++) { 1253 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1254 cnt += len; 1255 } 1256 } 1257 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1258 1259 /* store the columns to the file */ 1260 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1261 if (!rank) { 1262 MPI_Status status; 1263 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1264 for (i=1; i<size; i++) { 1265 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1266 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1267 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1268 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1269 } 1270 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1271 } else { 1272 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1273 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1274 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1275 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1276 } 1277 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1278 1279 /* load up the numerical values */ 1280 ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr); 1281 cnt = 0; 1282 for (i=0; i<a->mbs; i++) { 1283 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1284 for (j=B->i[i]; j<B->i[i+1]; j++) { 1285 if (garray[B->j[j]] > cstart) break; 1286 for (l=0; l<bs; l++) { 1287 for (ll=0; ll<bs; ll++) { 1288 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1289 } 1290 } 1291 cnt += bs; 1292 } 1293 for (k=A->i[i]; k<A->i[i+1]; k++) { 1294 for (l=0; l<bs; l++) { 1295 for (ll=0; ll<bs; ll++) { 1296 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1297 } 1298 } 1299 cnt += bs; 1300 } 1301 for (; j<B->i[i+1]; j++) { 1302 for (l=0; l<bs; l++) { 1303 for (ll=0; ll<bs; ll++) { 1304 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1305 } 1306 } 1307 cnt += bs; 1308 } 1309 cnt += (bs-1)*rlen; 1310 } 1311 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1312 1313 /* store the column values to the file */ 1314 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1315 if (!rank) { 1316 MPI_Status status; 1317 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1318 for (i=1; i<size; i++) { 1319 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1320 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1321 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1322 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1323 } 1324 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1325 } else { 1326 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1327 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1328 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1329 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1330 } 1331 ierr = PetscFree(column_values);CHKERRQ(ierr); 1332 1333 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1334 if (file) { 1335 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1336 } 1337 PetscFunctionReturn(0); 1338 } 1339 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatView_MPIBAIJ" 1342 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1343 { 1344 PetscErrorCode ierr; 1345 PetscBool iascii,isdraw,issocket,isbinary; 1346 1347 PetscFunctionBegin; 1348 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1349 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1350 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1351 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1352 if (iascii || isdraw || issocket) { 1353 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1354 } else if (isbinary) { 1355 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1356 } 1357 PetscFunctionReturn(0); 1358 } 1359 1360 #undef __FUNCT__ 1361 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1362 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1363 { 1364 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1365 PetscErrorCode ierr; 1366 1367 PetscFunctionBegin; 1368 #if defined(PETSC_USE_LOG) 1369 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1370 #endif 1371 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1372 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1373 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1374 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1375 #if defined(PETSC_USE_CTABLE) 1376 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1377 #else 1378 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1379 #endif 1380 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1381 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1382 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1383 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1384 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1385 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1386 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1387 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1388 1389 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1390 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1391 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1392 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1393 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1394 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1395 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1396 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr); 1397 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1398 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr); 1399 PetscFunctionReturn(0); 1400 } 1401 1402 #undef __FUNCT__ 1403 #define __FUNCT__ "MatMult_MPIBAIJ" 1404 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1405 { 1406 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1407 PetscErrorCode ierr; 1408 PetscInt nt; 1409 1410 PetscFunctionBegin; 1411 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1412 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1413 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1414 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1415 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1416 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1417 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1418 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1419 PetscFunctionReturn(0); 1420 } 1421 1422 #undef __FUNCT__ 1423 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1424 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1425 { 1426 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1427 PetscErrorCode ierr; 1428 1429 PetscFunctionBegin; 1430 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1431 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1432 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1433 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1434 PetscFunctionReturn(0); 1435 } 1436 1437 #undef __FUNCT__ 1438 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1439 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1440 { 1441 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1442 PetscErrorCode ierr; 1443 PetscBool merged; 1444 1445 PetscFunctionBegin; 1446 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1447 /* do nondiagonal part */ 1448 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1449 if (!merged) { 1450 /* send it on its way */ 1451 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1452 /* do local part */ 1453 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1454 /* receive remote parts: note this assumes the values are not actually */ 1455 /* inserted in yy until the next line */ 1456 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1457 } else { 1458 /* do local part */ 1459 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1460 /* send it on its way */ 1461 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1462 /* values actually were received in the Begin() but we need to call this nop */ 1463 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1464 } 1465 PetscFunctionReturn(0); 1466 } 1467 1468 #undef __FUNCT__ 1469 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1470 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1471 { 1472 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1473 PetscErrorCode ierr; 1474 1475 PetscFunctionBegin; 1476 /* do nondiagonal part */ 1477 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1478 /* send it on its way */ 1479 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1480 /* do local part */ 1481 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1482 /* receive remote parts: note this assumes the values are not actually */ 1483 /* inserted in yy until the next line, which is true for my implementation*/ 1484 /* but is not perhaps always true. */ 1485 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1486 PetscFunctionReturn(0); 1487 } 1488 1489 /* 1490 This only works correctly for square matrices where the subblock A->A is the 1491 diagonal block 1492 */ 1493 #undef __FUNCT__ 1494 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1495 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1496 { 1497 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1498 PetscErrorCode ierr; 1499 1500 PetscFunctionBegin; 1501 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1502 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1503 PetscFunctionReturn(0); 1504 } 1505 1506 #undef __FUNCT__ 1507 #define __FUNCT__ "MatScale_MPIBAIJ" 1508 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1509 { 1510 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1511 PetscErrorCode ierr; 1512 1513 PetscFunctionBegin; 1514 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1515 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1516 PetscFunctionReturn(0); 1517 } 1518 1519 #undef __FUNCT__ 1520 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1521 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1522 { 1523 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1524 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1525 PetscErrorCode ierr; 1526 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1527 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1528 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1529 1530 PetscFunctionBegin; 1531 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1532 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1533 mat->getrowactive = PETSC_TRUE; 1534 1535 if (!mat->rowvalues && (idx || v)) { 1536 /* 1537 allocate enough space to hold information from the longest row. 1538 */ 1539 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1540 PetscInt max = 1,mbs = mat->mbs,tmp; 1541 for (i=0; i<mbs; i++) { 1542 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1543 if (max < tmp) max = tmp; 1544 } 1545 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1546 } 1547 lrow = row - brstart; 1548 1549 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1550 if (!v) {pvA = 0; pvB = 0;} 1551 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1552 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1553 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1554 nztot = nzA + nzB; 1555 1556 cmap = mat->garray; 1557 if (v || idx) { 1558 if (nztot) { 1559 /* Sort by increasing column numbers, assuming A and B already sorted */ 1560 PetscInt imark = -1; 1561 if (v) { 1562 *v = v_p = mat->rowvalues; 1563 for (i=0; i<nzB; i++) { 1564 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1565 else break; 1566 } 1567 imark = i; 1568 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1569 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1570 } 1571 if (idx) { 1572 *idx = idx_p = mat->rowindices; 1573 if (imark > -1) { 1574 for (i=0; i<imark; i++) { 1575 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1576 } 1577 } else { 1578 for (i=0; i<nzB; i++) { 1579 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1580 else break; 1581 } 1582 imark = i; 1583 } 1584 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1585 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1586 } 1587 } else { 1588 if (idx) *idx = 0; 1589 if (v) *v = 0; 1590 } 1591 } 1592 *nz = nztot; 1593 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1594 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1595 PetscFunctionReturn(0); 1596 } 1597 1598 #undef __FUNCT__ 1599 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1600 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1601 { 1602 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1603 1604 PetscFunctionBegin; 1605 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1606 baij->getrowactive = PETSC_FALSE; 1607 PetscFunctionReturn(0); 1608 } 1609 1610 #undef __FUNCT__ 1611 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1612 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1613 { 1614 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1615 PetscErrorCode ierr; 1616 1617 PetscFunctionBegin; 1618 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1619 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1620 PetscFunctionReturn(0); 1621 } 1622 1623 #undef __FUNCT__ 1624 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1625 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1626 { 1627 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1628 Mat A = a->A,B = a->B; 1629 PetscErrorCode ierr; 1630 PetscReal isend[5],irecv[5]; 1631 1632 PetscFunctionBegin; 1633 info->block_size = (PetscReal)matin->rmap->bs; 1634 1635 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1636 1637 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1638 isend[3] = info->memory; isend[4] = info->mallocs; 1639 1640 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1641 1642 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1643 isend[3] += info->memory; isend[4] += info->mallocs; 1644 1645 if (flag == MAT_LOCAL) { 1646 info->nz_used = isend[0]; 1647 info->nz_allocated = isend[1]; 1648 info->nz_unneeded = isend[2]; 1649 info->memory = isend[3]; 1650 info->mallocs = isend[4]; 1651 } else if (flag == MAT_GLOBAL_MAX) { 1652 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1653 1654 info->nz_used = irecv[0]; 1655 info->nz_allocated = irecv[1]; 1656 info->nz_unneeded = irecv[2]; 1657 info->memory = irecv[3]; 1658 info->mallocs = irecv[4]; 1659 } else if (flag == MAT_GLOBAL_SUM) { 1660 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1661 1662 info->nz_used = irecv[0]; 1663 info->nz_allocated = irecv[1]; 1664 info->nz_unneeded = irecv[2]; 1665 info->memory = irecv[3]; 1666 info->mallocs = irecv[4]; 1667 } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1668 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1669 info->fill_ratio_needed = 0; 1670 info->factor_mallocs = 0; 1671 PetscFunctionReturn(0); 1672 } 1673 1674 #undef __FUNCT__ 1675 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1676 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1677 { 1678 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1679 PetscErrorCode ierr; 1680 1681 PetscFunctionBegin; 1682 switch (op) { 1683 case MAT_NEW_NONZERO_LOCATIONS: 1684 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1685 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1686 case MAT_KEEP_NONZERO_PATTERN: 1687 case MAT_NEW_NONZERO_LOCATION_ERR: 1688 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1689 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1690 break; 1691 case MAT_ROW_ORIENTED: 1692 a->roworiented = flg; 1693 1694 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1695 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1696 break; 1697 case MAT_NEW_DIAGONALS: 1698 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1699 break; 1700 case MAT_IGNORE_OFF_PROC_ENTRIES: 1701 a->donotstash = flg; 1702 break; 1703 case MAT_USE_HASH_TABLE: 1704 a->ht_flag = flg; 1705 break; 1706 case MAT_SYMMETRIC: 1707 case MAT_STRUCTURALLY_SYMMETRIC: 1708 case MAT_HERMITIAN: 1709 case MAT_SYMMETRY_ETERNAL: 1710 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1711 break; 1712 default: 1713 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1714 } 1715 PetscFunctionReturn(0); 1716 } 1717 1718 #undef __FUNCT__ 1719 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1720 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1721 { 1722 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1723 Mat_SeqBAIJ *Aloc; 1724 Mat B; 1725 PetscErrorCode ierr; 1726 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1727 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1728 MatScalar *a; 1729 1730 PetscFunctionBegin; 1731 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1732 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1733 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1734 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1735 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1736 /* Do not know preallocation information, but must set block size */ 1737 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr); 1738 } else { 1739 B = *matout; 1740 } 1741 1742 /* copy over the A part */ 1743 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1744 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1745 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1746 1747 for (i=0; i<mbs; i++) { 1748 rvals[0] = bs*(baij->rstartbs + i); 1749 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1750 for (j=ai[i]; j<ai[i+1]; j++) { 1751 col = (baij->cstartbs+aj[j])*bs; 1752 for (k=0; k<bs; k++) { 1753 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1754 1755 col++; a += bs; 1756 } 1757 } 1758 } 1759 /* copy over the B part */ 1760 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1761 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1762 for (i=0; i<mbs; i++) { 1763 rvals[0] = bs*(baij->rstartbs + i); 1764 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1765 for (j=ai[i]; j<ai[i+1]; j++) { 1766 col = baij->garray[aj[j]]*bs; 1767 for (k=0; k<bs; k++) { 1768 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1769 col++; 1770 a += bs; 1771 } 1772 } 1773 } 1774 ierr = PetscFree(rvals);CHKERRQ(ierr); 1775 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1776 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1777 1778 if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B; 1779 else { 1780 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1781 } 1782 PetscFunctionReturn(0); 1783 } 1784 1785 #undef __FUNCT__ 1786 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1787 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1788 { 1789 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1790 Mat a = baij->A,b = baij->B; 1791 PetscErrorCode ierr; 1792 PetscInt s1,s2,s3; 1793 1794 PetscFunctionBegin; 1795 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1796 if (rr) { 1797 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1798 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1799 /* Overlap communication with computation. */ 1800 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1801 } 1802 if (ll) { 1803 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1804 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1805 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1806 } 1807 /* scale the diagonal block */ 1808 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1809 1810 if (rr) { 1811 /* Do a scatter end and then right scale the off-diagonal block */ 1812 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1813 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1814 } 1815 PetscFunctionReturn(0); 1816 } 1817 1818 #undef __FUNCT__ 1819 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1820 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1821 { 1822 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1823 PetscInt *owners = A->rmap->range; 1824 PetscInt n = A->rmap->n; 1825 PetscSF sf; 1826 PetscInt *lrows; 1827 PetscSFNode *rrows; 1828 PetscInt r, p = 0, len = 0; 1829 PetscErrorCode ierr; 1830 1831 PetscFunctionBegin; 1832 /* Create SF where leaves are input rows and roots are owned rows */ 1833 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1834 for (r = 0; r < n; ++r) lrows[r] = -1; 1835 if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);} 1836 for (r = 0; r < N; ++r) { 1837 const PetscInt idx = rows[r]; 1838 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1839 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1840 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1841 } 1842 if (A->nooffproczerorows) { 1843 if (p != l->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,l->rank); 1844 lrows[len++] = idx - owners[p]; 1845 } else { 1846 rrows[r].rank = p; 1847 rrows[r].index = rows[r] - owners[p]; 1848 } 1849 } 1850 if (!A->nooffproczerorows) { 1851 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1852 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1853 /* Collect flags for rows to be zeroed */ 1854 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1855 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1856 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1857 /* Compress and put in row numbers */ 1858 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1859 } 1860 /* fix right hand side if needed */ 1861 if (x && b) { 1862 const PetscScalar *xx; 1863 PetscScalar *bb; 1864 1865 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1866 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1867 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 1868 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1869 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1870 } 1871 1872 /* actually zap the local rows */ 1873 /* 1874 Zero the required rows. If the "diagonal block" of the matrix 1875 is square and the user wishes to set the diagonal we use separate 1876 code so that MatSetValues() is not called for each diagonal allocating 1877 new memory, thus calling lots of mallocs and slowing things down. 1878 1879 */ 1880 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1881 ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1882 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1883 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr); 1884 } else if (diag != 0.0) { 1885 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr); 1886 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1887 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1888 for (r = 0; r < len; ++r) { 1889 const PetscInt row = lrows[r] + A->rmap->rstart; 1890 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1891 } 1892 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1893 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1894 } else { 1895 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1896 } 1897 ierr = PetscFree(lrows);CHKERRQ(ierr); 1898 1899 /* only change matrix nonzero state if pattern was allowed to be changed */ 1900 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1901 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1902 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1903 } 1904 PetscFunctionReturn(0); 1905 } 1906 1907 #undef __FUNCT__ 1908 #define __FUNCT__ "MatZeroRowsColumns_MPIBAIJ" 1909 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1910 { 1911 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1912 PetscErrorCode ierr; 1913 PetscMPIInt n = A->rmap->n; 1914 PetscInt i,j,k,r,p = 0,len = 0,row,col,count; 1915 PetscInt *lrows,*owners = A->rmap->range; 1916 PetscSFNode *rrows; 1917 PetscSF sf; 1918 const PetscScalar *xx; 1919 PetscScalar *bb,*mask; 1920 Vec xmask,lmask; 1921 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)l->B->data; 1922 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1923 PetscScalar *aa; 1924 1925 PetscFunctionBegin; 1926 /* Create SF where leaves are input rows and roots are owned rows */ 1927 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1928 for (r = 0; r < n; ++r) lrows[r] = -1; 1929 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1930 for (r = 0; r < N; ++r) { 1931 const PetscInt idx = rows[r]; 1932 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1933 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1934 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1935 } 1936 rrows[r].rank = p; 1937 rrows[r].index = rows[r] - owners[p]; 1938 } 1939 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1940 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1941 /* Collect flags for rows to be zeroed */ 1942 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1943 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1944 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1945 /* Compress and put in row numbers */ 1946 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1947 /* zero diagonal part of matrix */ 1948 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1949 /* handle off diagonal part of matrix */ 1950 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1951 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1952 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1953 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1954 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1955 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1956 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1957 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1958 if (x) { 1959 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1960 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1961 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1962 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1963 } 1964 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1965 /* remove zeroed rows of off diagonal matrix */ 1966 for (i = 0; i < len; ++i) { 1967 row = lrows[i]; 1968 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1969 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 1970 for (k = 0; k < count; ++k) { 1971 aa[0] = 0.0; 1972 aa += bs; 1973 } 1974 } 1975 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1976 for (i = 0; i < l->B->rmap->N; ++i) { 1977 row = i/bs; 1978 for (j = baij->i[row]; j < baij->i[row+1]; ++j) { 1979 for (k = 0; k < bs; ++k) { 1980 col = bs*baij->j[j] + k; 1981 if (PetscAbsScalar(mask[col])) { 1982 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 1983 if (b) bb[i] -= aa[0]*xx[col]; 1984 aa[0] = 0.0; 1985 } 1986 } 1987 } 1988 } 1989 if (x) { 1990 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1991 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1992 } 1993 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1994 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1995 ierr = PetscFree(lrows);CHKERRQ(ierr); 1996 1997 /* only change matrix nonzero state if pattern was allowed to be changed */ 1998 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1999 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 2000 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2001 } 2002 PetscFunctionReturn(0); 2003 } 2004 2005 #undef __FUNCT__ 2006 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 2007 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 2008 { 2009 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2010 PetscErrorCode ierr; 2011 2012 PetscFunctionBegin; 2013 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2014 PetscFunctionReturn(0); 2015 } 2016 2017 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 2018 2019 #undef __FUNCT__ 2020 #define __FUNCT__ "MatEqual_MPIBAIJ" 2021 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 2022 { 2023 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 2024 Mat a,b,c,d; 2025 PetscBool flg; 2026 PetscErrorCode ierr; 2027 2028 PetscFunctionBegin; 2029 a = matA->A; b = matA->B; 2030 c = matB->A; d = matB->B; 2031 2032 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2033 if (flg) { 2034 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2035 } 2036 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2037 PetscFunctionReturn(0); 2038 } 2039 2040 #undef __FUNCT__ 2041 #define __FUNCT__ "MatCopy_MPIBAIJ" 2042 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 2043 { 2044 PetscErrorCode ierr; 2045 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2046 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2047 2048 PetscFunctionBegin; 2049 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2050 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2051 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2052 } else { 2053 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2054 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2055 } 2056 PetscFunctionReturn(0); 2057 } 2058 2059 #undef __FUNCT__ 2060 #define __FUNCT__ "MatSetUp_MPIBAIJ" 2061 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 2062 { 2063 PetscErrorCode ierr; 2064 2065 PetscFunctionBegin; 2066 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2067 PetscFunctionReturn(0); 2068 } 2069 2070 #undef __FUNCT__ 2071 #define __FUNCT__ "MatAXPYGetPreallocation_MPIBAIJ" 2072 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2073 { 2074 PetscErrorCode ierr; 2075 PetscInt bs = Y->rmap->bs,m = Y->rmap->N/bs; 2076 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2077 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2078 2079 PetscFunctionBegin; 2080 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2081 PetscFunctionReturn(0); 2082 } 2083 2084 #undef __FUNCT__ 2085 #define __FUNCT__ "MatAXPY_MPIBAIJ" 2086 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2087 { 2088 PetscErrorCode ierr; 2089 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 2090 PetscBLASInt bnz,one=1; 2091 Mat_SeqBAIJ *x,*y; 2092 2093 PetscFunctionBegin; 2094 if (str == SAME_NONZERO_PATTERN) { 2095 PetscScalar alpha = a; 2096 x = (Mat_SeqBAIJ*)xx->A->data; 2097 y = (Mat_SeqBAIJ*)yy->A->data; 2098 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2099 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2100 x = (Mat_SeqBAIJ*)xx->B->data; 2101 y = (Mat_SeqBAIJ*)yy->B->data; 2102 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2103 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2104 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2105 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2106 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2107 } else { 2108 Mat B; 2109 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 2110 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2111 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2112 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2113 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2114 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2115 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2116 ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr); 2117 ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2118 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2119 ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2120 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 2121 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2122 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2123 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2124 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2125 } 2126 PetscFunctionReturn(0); 2127 } 2128 2129 #undef __FUNCT__ 2130 #define __FUNCT__ "MatRealPart_MPIBAIJ" 2131 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 2132 { 2133 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2134 PetscErrorCode ierr; 2135 2136 PetscFunctionBegin; 2137 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2138 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2139 PetscFunctionReturn(0); 2140 } 2141 2142 #undef __FUNCT__ 2143 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 2144 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2145 { 2146 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2147 PetscErrorCode ierr; 2148 2149 PetscFunctionBegin; 2150 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2151 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2152 PetscFunctionReturn(0); 2153 } 2154 2155 #undef __FUNCT__ 2156 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2157 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2158 { 2159 PetscErrorCode ierr; 2160 IS iscol_local; 2161 PetscInt csize; 2162 2163 PetscFunctionBegin; 2164 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2165 if (call == MAT_REUSE_MATRIX) { 2166 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2167 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2168 } else { 2169 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2170 } 2171 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2172 if (call == MAT_INITIAL_MATRIX) { 2173 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2174 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2175 } 2176 PetscFunctionReturn(0); 2177 } 2178 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*); 2179 #undef __FUNCT__ 2180 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2181 /* 2182 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2183 in local and then by concatenating the local matrices the end result. 2184 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ(). 2185 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 2186 */ 2187 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2188 { 2189 PetscErrorCode ierr; 2190 PetscMPIInt rank,size; 2191 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2192 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow; 2193 Mat M,Mreuse; 2194 MatScalar *vwork,*aa; 2195 MPI_Comm comm; 2196 IS isrow_new, iscol_new; 2197 PetscBool idflag,allrows, allcols; 2198 Mat_SeqBAIJ *aij; 2199 2200 PetscFunctionBegin; 2201 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2202 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2203 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2204 /* The compression and expansion should be avoided. Doesn't point 2205 out errors, might change the indices, hence buggey */ 2206 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 2207 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 2208 2209 /* Check for special case: each processor gets entire matrix columns */ 2210 ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr); 2211 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 2212 if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE; 2213 else allcols = PETSC_FALSE; 2214 2215 ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr); 2216 ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr); 2217 if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE; 2218 else allrows = PETSC_FALSE; 2219 2220 if (call == MAT_REUSE_MATRIX) { 2221 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 2222 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2223 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2224 } else { 2225 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2226 } 2227 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 2228 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 2229 /* 2230 m - number of local rows 2231 n - number of columns (same on all processors) 2232 rstart - first row in new global matrix generated 2233 */ 2234 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2235 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2236 m = m/bs; 2237 n = n/bs; 2238 2239 if (call == MAT_INITIAL_MATRIX) { 2240 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2241 ii = aij->i; 2242 jj = aij->j; 2243 2244 /* 2245 Determine the number of non-zeros in the diagonal and off-diagonal 2246 portions of the matrix in order to do correct preallocation 2247 */ 2248 2249 /* first get start and end of "diagonal" columns */ 2250 if (csize == PETSC_DECIDE) { 2251 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2252 if (mglobal == n*bs) { /* square matrix */ 2253 nlocal = m; 2254 } else { 2255 nlocal = n/size + ((n % size) > rank); 2256 } 2257 } else { 2258 nlocal = csize/bs; 2259 } 2260 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2261 rstart = rend - nlocal; 2262 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2263 2264 /* next, compute all the lengths */ 2265 ierr = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr); 2266 for (i=0; i<m; i++) { 2267 jend = ii[i+1] - ii[i]; 2268 olen = 0; 2269 dlen = 0; 2270 for (j=0; j<jend; j++) { 2271 if (*jj < rstart || *jj >= rend) olen++; 2272 else dlen++; 2273 jj++; 2274 } 2275 olens[i] = olen; 2276 dlens[i] = dlen; 2277 } 2278 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2279 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2280 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2281 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2282 ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2283 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 2284 } else { 2285 PetscInt ml,nl; 2286 2287 M = *newmat; 2288 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2289 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2290 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2291 /* 2292 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2293 rather than the slower MatSetValues(). 2294 */ 2295 M->was_assembled = PETSC_TRUE; 2296 M->assembled = PETSC_FALSE; 2297 } 2298 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2299 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2300 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2301 ii = aij->i; 2302 jj = aij->j; 2303 aa = aij->a; 2304 for (i=0; i<m; i++) { 2305 row = rstart/bs + i; 2306 nz = ii[i+1] - ii[i]; 2307 cwork = jj; jj += nz; 2308 vwork = aa; aa += nz*bs*bs; 2309 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2310 } 2311 2312 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2313 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2314 *newmat = M; 2315 2316 /* save submatrix used in processor for next request */ 2317 if (call == MAT_INITIAL_MATRIX) { 2318 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2319 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2320 } 2321 PetscFunctionReturn(0); 2322 } 2323 2324 #undef __FUNCT__ 2325 #define __FUNCT__ "MatPermute_MPIBAIJ" 2326 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2327 { 2328 MPI_Comm comm,pcomm; 2329 PetscInt clocal_size,nrows; 2330 const PetscInt *rows; 2331 PetscMPIInt size; 2332 IS crowp,lcolp; 2333 PetscErrorCode ierr; 2334 2335 PetscFunctionBegin; 2336 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2337 /* make a collective version of 'rowp' */ 2338 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2339 if (pcomm==comm) { 2340 crowp = rowp; 2341 } else { 2342 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2343 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2344 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2345 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2346 } 2347 ierr = ISSetPermutation(crowp);CHKERRQ(ierr); 2348 /* make a local version of 'colp' */ 2349 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2350 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2351 if (size==1) { 2352 lcolp = colp; 2353 } else { 2354 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 2355 } 2356 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2357 /* now we just get the submatrix */ 2358 ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr); 2359 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2360 /* clean up */ 2361 if (pcomm!=comm) { 2362 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2363 } 2364 if (size>1) { 2365 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2366 } 2367 PetscFunctionReturn(0); 2368 } 2369 2370 #undef __FUNCT__ 2371 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2372 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2373 { 2374 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2375 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2376 2377 PetscFunctionBegin; 2378 if (nghosts) *nghosts = B->nbs; 2379 if (ghosts) *ghosts = baij->garray; 2380 PetscFunctionReturn(0); 2381 } 2382 2383 #undef __FUNCT__ 2384 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ" 2385 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2386 { 2387 Mat B; 2388 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2389 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2390 Mat_SeqAIJ *b; 2391 PetscErrorCode ierr; 2392 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2393 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2394 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2395 2396 PetscFunctionBegin; 2397 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2398 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2399 2400 /* ---------------------------------------------------------------- 2401 Tell every processor the number of nonzeros per row 2402 */ 2403 ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr); 2404 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2405 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2406 } 2407 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2408 ierr = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr); 2409 displs = recvcounts + size; 2410 for (i=0; i<size; i++) { 2411 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2412 displs[i] = A->rmap->range[i]/bs; 2413 } 2414 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2415 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2416 #else 2417 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2418 #endif 2419 /* --------------------------------------------------------------- 2420 Create the sequential matrix of the same type as the local block diagonal 2421 */ 2422 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2423 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2424 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2425 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2426 b = (Mat_SeqAIJ*)B->data; 2427 2428 /*-------------------------------------------------------------------- 2429 Copy my part of matrix column indices over 2430 */ 2431 sendcount = ad->nz + bd->nz; 2432 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2433 a_jsendbuf = ad->j; 2434 b_jsendbuf = bd->j; 2435 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2436 cnt = 0; 2437 for (i=0; i<n; i++) { 2438 2439 /* put in lower diagonal portion */ 2440 m = bd->i[i+1] - bd->i[i]; 2441 while (m > 0) { 2442 /* is it above diagonal (in bd (compressed) numbering) */ 2443 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2444 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2445 m--; 2446 } 2447 2448 /* put in diagonal portion */ 2449 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2450 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2451 } 2452 2453 /* put in upper diagonal portion */ 2454 while (m-- > 0) { 2455 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2456 } 2457 } 2458 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2459 2460 /*-------------------------------------------------------------------- 2461 Gather all column indices to all processors 2462 */ 2463 for (i=0; i<size; i++) { 2464 recvcounts[i] = 0; 2465 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2466 recvcounts[i] += lens[j]; 2467 } 2468 } 2469 displs[0] = 0; 2470 for (i=1; i<size; i++) { 2471 displs[i] = displs[i-1] + recvcounts[i-1]; 2472 } 2473 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2474 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2475 #else 2476 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2477 #endif 2478 /*-------------------------------------------------------------------- 2479 Assemble the matrix into useable form (note numerical values not yet set) 2480 */ 2481 /* set the b->ilen (length of each row) values */ 2482 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2483 /* set the b->i indices */ 2484 b->i[0] = 0; 2485 for (i=1; i<=A->rmap->N/bs; i++) { 2486 b->i[i] = b->i[i-1] + lens[i-1]; 2487 } 2488 ierr = PetscFree(lens);CHKERRQ(ierr); 2489 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2490 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2491 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2492 2493 if (A->symmetric) { 2494 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2495 } else if (A->hermitian) { 2496 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2497 } else if (A->structurally_symmetric) { 2498 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2499 } 2500 *newmat = B; 2501 PetscFunctionReturn(0); 2502 } 2503 2504 #undef __FUNCT__ 2505 #define __FUNCT__ "MatSOR_MPIBAIJ" 2506 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2507 { 2508 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2509 PetscErrorCode ierr; 2510 Vec bb1 = 0; 2511 2512 PetscFunctionBegin; 2513 if (flag == SOR_APPLY_UPPER) { 2514 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2515 PetscFunctionReturn(0); 2516 } 2517 2518 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2519 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2520 } 2521 2522 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2523 if (flag & SOR_ZERO_INITIAL_GUESS) { 2524 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2525 its--; 2526 } 2527 2528 while (its--) { 2529 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2530 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2531 2532 /* update rhs: bb1 = bb - B*x */ 2533 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2534 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2535 2536 /* local sweep */ 2537 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2538 } 2539 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2540 if (flag & SOR_ZERO_INITIAL_GUESS) { 2541 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2542 its--; 2543 } 2544 while (its--) { 2545 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2546 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2547 2548 /* update rhs: bb1 = bb - B*x */ 2549 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2550 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2551 2552 /* local sweep */ 2553 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2554 } 2555 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2556 if (flag & SOR_ZERO_INITIAL_GUESS) { 2557 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2558 its--; 2559 } 2560 while (its--) { 2561 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2562 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2563 2564 /* update rhs: bb1 = bb - B*x */ 2565 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2566 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2567 2568 /* local sweep */ 2569 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2570 } 2571 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2572 2573 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2574 PetscFunctionReturn(0); 2575 } 2576 2577 #undef __FUNCT__ 2578 #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ" 2579 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms) 2580 { 2581 PetscErrorCode ierr; 2582 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data; 2583 PetscInt N,i,*garray = aij->garray; 2584 PetscInt ib,jb,bs = A->rmap->bs; 2585 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data; 2586 MatScalar *a_val = a_aij->a; 2587 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data; 2588 MatScalar *b_val = b_aij->a; 2589 PetscReal *work; 2590 2591 PetscFunctionBegin; 2592 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 2593 ierr = PetscCalloc1(N,&work);CHKERRQ(ierr); 2594 if (type == NORM_2) { 2595 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2596 for (jb=0; jb<bs; jb++) { 2597 for (ib=0; ib<bs; ib++) { 2598 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2599 a_val++; 2600 } 2601 } 2602 } 2603 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2604 for (jb=0; jb<bs; jb++) { 2605 for (ib=0; ib<bs; ib++) { 2606 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2607 b_val++; 2608 } 2609 } 2610 } 2611 } else if (type == NORM_1) { 2612 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2613 for (jb=0; jb<bs; jb++) { 2614 for (ib=0; ib<bs; ib++) { 2615 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2616 a_val++; 2617 } 2618 } 2619 } 2620 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2621 for (jb=0; jb<bs; jb++) { 2622 for (ib=0; ib<bs; ib++) { 2623 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2624 b_val++; 2625 } 2626 } 2627 } 2628 } else if (type == NORM_INFINITY) { 2629 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2630 for (jb=0; jb<bs; jb++) { 2631 for (ib=0; ib<bs; ib++) { 2632 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2633 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2634 a_val++; 2635 } 2636 } 2637 } 2638 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2639 for (jb=0; jb<bs; jb++) { 2640 for (ib=0; ib<bs; ib++) { 2641 int col = garray[b_aij->j[i]] * bs + jb; 2642 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2643 b_val++; 2644 } 2645 } 2646 } 2647 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 2648 if (type == NORM_INFINITY) { 2649 ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2650 } else { 2651 ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2652 } 2653 ierr = PetscFree(work);CHKERRQ(ierr); 2654 if (type == NORM_2) { 2655 for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]); 2656 } 2657 PetscFunctionReturn(0); 2658 } 2659 2660 #undef __FUNCT__ 2661 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ" 2662 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2663 { 2664 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2665 PetscErrorCode ierr; 2666 2667 PetscFunctionBegin; 2668 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2669 PetscFunctionReturn(0); 2670 } 2671 2672 #undef __FUNCT__ 2673 #define __FUNCT__ "MatShift_MPIBAIJ" 2674 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a) 2675 { 2676 PetscErrorCode ierr; 2677 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ*)Y->data; 2678 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)maij->A->data; 2679 2680 PetscFunctionBegin; 2681 if (!Y->preallocated) { 2682 ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 2683 } else if (!aij->nz) { 2684 ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2685 } 2686 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2687 PetscFunctionReturn(0); 2688 } 2689 2690 /* -------------------------------------------------------------------*/ 2691 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2692 MatGetRow_MPIBAIJ, 2693 MatRestoreRow_MPIBAIJ, 2694 MatMult_MPIBAIJ, 2695 /* 4*/ MatMultAdd_MPIBAIJ, 2696 MatMultTranspose_MPIBAIJ, 2697 MatMultTransposeAdd_MPIBAIJ, 2698 0, 2699 0, 2700 0, 2701 /*10*/ 0, 2702 0, 2703 0, 2704 MatSOR_MPIBAIJ, 2705 MatTranspose_MPIBAIJ, 2706 /*15*/ MatGetInfo_MPIBAIJ, 2707 MatEqual_MPIBAIJ, 2708 MatGetDiagonal_MPIBAIJ, 2709 MatDiagonalScale_MPIBAIJ, 2710 MatNorm_MPIBAIJ, 2711 /*20*/ MatAssemblyBegin_MPIBAIJ, 2712 MatAssemblyEnd_MPIBAIJ, 2713 MatSetOption_MPIBAIJ, 2714 MatZeroEntries_MPIBAIJ, 2715 /*24*/ MatZeroRows_MPIBAIJ, 2716 0, 2717 0, 2718 0, 2719 0, 2720 /*29*/ MatSetUp_MPIBAIJ, 2721 0, 2722 0, 2723 0, 2724 0, 2725 /*34*/ MatDuplicate_MPIBAIJ, 2726 0, 2727 0, 2728 0, 2729 0, 2730 /*39*/ MatAXPY_MPIBAIJ, 2731 MatGetSubMatrices_MPIBAIJ, 2732 MatIncreaseOverlap_MPIBAIJ, 2733 MatGetValues_MPIBAIJ, 2734 MatCopy_MPIBAIJ, 2735 /*44*/ 0, 2736 MatScale_MPIBAIJ, 2737 MatShift_MPIBAIJ, 2738 0, 2739 MatZeroRowsColumns_MPIBAIJ, 2740 /*49*/ 0, 2741 0, 2742 0, 2743 0, 2744 0, 2745 /*54*/ MatFDColoringCreate_MPIXAIJ, 2746 0, 2747 MatSetUnfactored_MPIBAIJ, 2748 MatPermute_MPIBAIJ, 2749 MatSetValuesBlocked_MPIBAIJ, 2750 /*59*/ MatGetSubMatrix_MPIBAIJ, 2751 MatDestroy_MPIBAIJ, 2752 MatView_MPIBAIJ, 2753 0, 2754 0, 2755 /*64*/ 0, 2756 0, 2757 0, 2758 0, 2759 0, 2760 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2761 0, 2762 0, 2763 0, 2764 0, 2765 /*74*/ 0, 2766 MatFDColoringApply_BAIJ, 2767 0, 2768 0, 2769 0, 2770 /*79*/ 0, 2771 0, 2772 0, 2773 0, 2774 MatLoad_MPIBAIJ, 2775 /*84*/ 0, 2776 0, 2777 0, 2778 0, 2779 0, 2780 /*89*/ 0, 2781 0, 2782 0, 2783 0, 2784 0, 2785 /*94*/ 0, 2786 0, 2787 0, 2788 0, 2789 0, 2790 /*99*/ 0, 2791 0, 2792 0, 2793 0, 2794 0, 2795 /*104*/0, 2796 MatRealPart_MPIBAIJ, 2797 MatImaginaryPart_MPIBAIJ, 2798 0, 2799 0, 2800 /*109*/0, 2801 0, 2802 0, 2803 0, 2804 0, 2805 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2806 0, 2807 MatGetGhosts_MPIBAIJ, 2808 0, 2809 0, 2810 /*119*/0, 2811 0, 2812 0, 2813 0, 2814 MatGetMultiProcBlock_MPIBAIJ, 2815 /*124*/0, 2816 MatGetColumnNorms_MPIBAIJ, 2817 MatInvertBlockDiagonal_MPIBAIJ, 2818 0, 2819 0, 2820 /*129*/ 0, 2821 0, 2822 0, 2823 0, 2824 0, 2825 /*134*/ 0, 2826 0, 2827 0, 2828 0, 2829 0, 2830 /*139*/ 0, 2831 0, 2832 0, 2833 MatFDColoringSetUp_MPIXAIJ, 2834 0, 2835 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ 2836 }; 2837 2838 #undef __FUNCT__ 2839 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2840 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2841 { 2842 PetscFunctionBegin; 2843 *a = ((Mat_MPIBAIJ*)A->data)->A; 2844 PetscFunctionReturn(0); 2845 } 2846 2847 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2848 2849 #undef __FUNCT__ 2850 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2851 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2852 { 2853 PetscInt m,rstart,cstart,cend; 2854 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2855 const PetscInt *JJ =0; 2856 PetscScalar *values=0; 2857 PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented; 2858 PetscErrorCode ierr; 2859 2860 PetscFunctionBegin; 2861 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2862 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2863 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2864 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2865 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2866 m = B->rmap->n/bs; 2867 rstart = B->rmap->rstart/bs; 2868 cstart = B->cmap->rstart/bs; 2869 cend = B->cmap->rend/bs; 2870 2871 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2872 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2873 for (i=0; i<m; i++) { 2874 nz = ii[i+1] - ii[i]; 2875 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2876 nz_max = PetscMax(nz_max,nz); 2877 JJ = jj + ii[i]; 2878 for (j=0; j<nz; j++) { 2879 if (*JJ >= cstart) break; 2880 JJ++; 2881 } 2882 d = 0; 2883 for (; j<nz; j++) { 2884 if (*JJ++ >= cend) break; 2885 d++; 2886 } 2887 d_nnz[i] = d; 2888 o_nnz[i] = nz - d; 2889 } 2890 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2891 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2892 2893 values = (PetscScalar*)V; 2894 if (!values) { 2895 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2896 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2897 } 2898 for (i=0; i<m; i++) { 2899 PetscInt row = i + rstart; 2900 PetscInt ncols = ii[i+1] - ii[i]; 2901 const PetscInt *icols = jj + ii[i]; 2902 if (!roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2903 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2904 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2905 } else { /* block ordering does not match so we can only insert one block at a time. */ 2906 PetscInt j; 2907 for (j=0; j<ncols; j++) { 2908 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2909 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2910 } 2911 } 2912 } 2913 2914 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2915 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2916 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2917 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2918 PetscFunctionReturn(0); 2919 } 2920 2921 #undef __FUNCT__ 2922 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2923 /*@C 2924 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2925 (the default parallel PETSc format). 2926 2927 Collective on MPI_Comm 2928 2929 Input Parameters: 2930 + B - the matrix 2931 . bs - the block size 2932 . i - the indices into j for the start of each local row (starts with zero) 2933 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2934 - v - optional values in the matrix 2935 2936 Level: developer 2937 2938 Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 2939 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 2940 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2941 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2942 block column and the second index is over columns within a block. 2943 2944 .keywords: matrix, aij, compressed row, sparse, parallel 2945 2946 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ 2947 @*/ 2948 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2949 { 2950 PetscErrorCode ierr; 2951 2952 PetscFunctionBegin; 2953 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2954 PetscValidType(B,1); 2955 PetscValidLogicalCollectiveInt(B,bs,2); 2956 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2957 PetscFunctionReturn(0); 2958 } 2959 2960 #undef __FUNCT__ 2961 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2962 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2963 { 2964 Mat_MPIBAIJ *b; 2965 PetscErrorCode ierr; 2966 PetscInt i; 2967 2968 PetscFunctionBegin; 2969 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2970 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2971 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2972 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2973 2974 if (d_nnz) { 2975 for (i=0; i<B->rmap->n/bs; i++) { 2976 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2977 } 2978 } 2979 if (o_nnz) { 2980 for (i=0; i<B->rmap->n/bs; i++) { 2981 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2982 } 2983 } 2984 2985 b = (Mat_MPIBAIJ*)B->data; 2986 b->bs2 = bs*bs; 2987 b->mbs = B->rmap->n/bs; 2988 b->nbs = B->cmap->n/bs; 2989 b->Mbs = B->rmap->N/bs; 2990 b->Nbs = B->cmap->N/bs; 2991 2992 for (i=0; i<=b->size; i++) { 2993 b->rangebs[i] = B->rmap->range[i]/bs; 2994 } 2995 b->rstartbs = B->rmap->rstart/bs; 2996 b->rendbs = B->rmap->rend/bs; 2997 b->cstartbs = B->cmap->rstart/bs; 2998 b->cendbs = B->cmap->rend/bs; 2999 3000 if (!B->preallocated) { 3001 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3002 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3003 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 3004 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3005 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3006 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3007 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 3008 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3009 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 3010 } 3011 3012 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3013 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 3014 B->preallocated = PETSC_TRUE; 3015 PetscFunctionReturn(0); 3016 } 3017 3018 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 3019 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 3020 3021 #undef __FUNCT__ 3022 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 3023 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 3024 { 3025 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 3026 PetscErrorCode ierr; 3027 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 3028 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 3029 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 3030 3031 PetscFunctionBegin; 3032 ierr = PetscMalloc1(M+1,&ii);CHKERRQ(ierr); 3033 ii[0] = 0; 3034 for (i=0; i<M; i++) { 3035 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 3036 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 3037 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 3038 /* remove one from count of matrix has diagonal */ 3039 for (j=id[i]; j<id[i+1]; j++) { 3040 if (jd[j] == i) {ii[i+1]--;break;} 3041 } 3042 } 3043 ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr); 3044 cnt = 0; 3045 for (i=0; i<M; i++) { 3046 for (j=io[i]; j<io[i+1]; j++) { 3047 if (garray[jo[j]] > rstart) break; 3048 jj[cnt++] = garray[jo[j]]; 3049 } 3050 for (k=id[i]; k<id[i+1]; k++) { 3051 if (jd[k] != i) { 3052 jj[cnt++] = rstart + jd[k]; 3053 } 3054 } 3055 for (; j<io[i+1]; j++) { 3056 jj[cnt++] = garray[jo[j]]; 3057 } 3058 } 3059 ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr); 3060 PetscFunctionReturn(0); 3061 } 3062 3063 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3064 3065 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 3066 3067 #undef __FUNCT__ 3068 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 3069 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 3070 { 3071 PetscErrorCode ierr; 3072 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3073 Mat B; 3074 Mat_MPIAIJ *b; 3075 3076 PetscFunctionBegin; 3077 if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 3078 3079 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3080 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 3081 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3082 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3083 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 3084 ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 3085 b = (Mat_MPIAIJ*) B->data; 3086 3087 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 3088 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 3089 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 3090 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 3091 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 3092 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3093 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3094 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3095 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3096 if (reuse == MAT_REUSE_MATRIX) { 3097 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3098 } else { 3099 *newmat = B; 3100 } 3101 PetscFunctionReturn(0); 3102 } 3103 3104 /*MC 3105 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 3106 3107 Options Database Keys: 3108 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 3109 . -mat_block_size <bs> - set the blocksize used to store the matrix 3110 - -mat_use_hash_table <fact> 3111 3112 Level: beginner 3113 3114 .seealso: MatCreateMPIBAIJ 3115 M*/ 3116 3117 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 3118 3119 #undef __FUNCT__ 3120 #define __FUNCT__ "MatCreate_MPIBAIJ" 3121 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3122 { 3123 Mat_MPIBAIJ *b; 3124 PetscErrorCode ierr; 3125 PetscBool flg = PETSC_FALSE; 3126 3127 PetscFunctionBegin; 3128 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3129 B->data = (void*)b; 3130 3131 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3132 B->assembled = PETSC_FALSE; 3133 3134 B->insertmode = NOT_SET_VALUES; 3135 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 3136 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 3137 3138 /* build local table of row and column ownerships */ 3139 ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr); 3140 3141 /* build cache for off array entries formed */ 3142 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 3143 3144 b->donotstash = PETSC_FALSE; 3145 b->colmap = NULL; 3146 b->garray = NULL; 3147 b->roworiented = PETSC_TRUE; 3148 3149 /* stuff used in block assembly */ 3150 b->barray = 0; 3151 3152 /* stuff used for matrix vector multiply */ 3153 b->lvec = 0; 3154 b->Mvctx = 0; 3155 3156 /* stuff for MatGetRow() */ 3157 b->rowindices = 0; 3158 b->rowvalues = 0; 3159 b->getrowactive = PETSC_FALSE; 3160 3161 /* hash table stuff */ 3162 b->ht = 0; 3163 b->hd = 0; 3164 b->ht_size = 0; 3165 b->ht_flag = PETSC_FALSE; 3166 b->ht_fact = 0; 3167 b->ht_total_ct = 0; 3168 b->ht_insert_ct = 0; 3169 3170 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 3171 b->ijonly = PETSC_FALSE; 3172 3173 3174 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3175 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3176 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3177 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3178 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3179 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3180 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3181 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3182 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3183 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3184 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr); 3185 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3186 3187 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3188 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 3189 if (flg) { 3190 PetscReal fact = 1.39; 3191 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3192 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 3193 if (fact <= 1.0) fact = 1.39; 3194 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3195 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3196 } 3197 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3198 PetscFunctionReturn(0); 3199 } 3200 3201 /*MC 3202 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3203 3204 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3205 and MATMPIBAIJ otherwise. 3206 3207 Options Database Keys: 3208 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3209 3210 Level: beginner 3211 3212 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3213 M*/ 3214 3215 #undef __FUNCT__ 3216 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3217 /*@C 3218 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3219 (block compressed row). For good matrix assembly performance 3220 the user should preallocate the matrix storage by setting the parameters 3221 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3222 performance can be increased by more than a factor of 50. 3223 3224 Collective on Mat 3225 3226 Input Parameters: 3227 + B - the matrix 3228 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3229 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3230 . d_nz - number of block nonzeros per block row in diagonal portion of local 3231 submatrix (same for all local rows) 3232 . d_nnz - array containing the number of block nonzeros in the various block rows 3233 of the in diagonal portion of the local (possibly different for each block 3234 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 3235 set it even if it is zero. 3236 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3237 submatrix (same for all local rows). 3238 - o_nnz - array containing the number of nonzeros in the various block rows of the 3239 off-diagonal portion of the local submatrix (possibly different for 3240 each block row) or NULL. 3241 3242 If the *_nnz parameter is given then the *_nz parameter is ignored 3243 3244 Options Database Keys: 3245 + -mat_block_size - size of the blocks to use 3246 - -mat_use_hash_table <fact> 3247 3248 Notes: 3249 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3250 than it must be used on all processors that share the object for that argument. 3251 3252 Storage Information: 3253 For a square global matrix we define each processor's diagonal portion 3254 to be its local rows and the corresponding columns (a square submatrix); 3255 each processor's off-diagonal portion encompasses the remainder of the 3256 local matrix (a rectangular submatrix). 3257 3258 The user can specify preallocated storage for the diagonal part of 3259 the local submatrix with either d_nz or d_nnz (not both). Set 3260 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3261 memory allocation. Likewise, specify preallocated storage for the 3262 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3263 3264 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3265 the figure below we depict these three local rows and all columns (0-11). 3266 3267 .vb 3268 0 1 2 3 4 5 6 7 8 9 10 11 3269 -------------------------- 3270 row 3 |o o o d d d o o o o o o 3271 row 4 |o o o d d d o o o o o o 3272 row 5 |o o o d d d o o o o o o 3273 -------------------------- 3274 .ve 3275 3276 Thus, any entries in the d locations are stored in the d (diagonal) 3277 submatrix, and any entries in the o locations are stored in the 3278 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3279 stored simply in the MATSEQBAIJ format for compressed row storage. 3280 3281 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3282 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3283 In general, for PDE problems in which most nonzeros are near the diagonal, 3284 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3285 or you will get TERRIBLE performance; see the users' manual chapter on 3286 matrices. 3287 3288 You can call MatGetInfo() to get information on how effective the preallocation was; 3289 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3290 You can also run with the option -info and look for messages with the string 3291 malloc in them to see if additional memory allocation was needed. 3292 3293 Level: intermediate 3294 3295 .keywords: matrix, block, aij, compressed row, sparse, parallel 3296 3297 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3298 @*/ 3299 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3300 { 3301 PetscErrorCode ierr; 3302 3303 PetscFunctionBegin; 3304 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3305 PetscValidType(B,1); 3306 PetscValidLogicalCollectiveInt(B,bs,2); 3307 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3308 PetscFunctionReturn(0); 3309 } 3310 3311 #undef __FUNCT__ 3312 #define __FUNCT__ "MatCreateBAIJ" 3313 /*@C 3314 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3315 (block compressed row). For good matrix assembly performance 3316 the user should preallocate the matrix storage by setting the parameters 3317 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3318 performance can be increased by more than a factor of 50. 3319 3320 Collective on MPI_Comm 3321 3322 Input Parameters: 3323 + comm - MPI communicator 3324 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3325 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3326 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3327 This value should be the same as the local size used in creating the 3328 y vector for the matrix-vector product y = Ax. 3329 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3330 This value should be the same as the local size used in creating the 3331 x vector for the matrix-vector product y = Ax. 3332 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3333 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3334 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3335 submatrix (same for all local rows) 3336 . d_nnz - array containing the number of nonzero blocks in the various block rows 3337 of the in diagonal portion of the local (possibly different for each block 3338 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3339 and set it even if it is zero. 3340 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3341 submatrix (same for all local rows). 3342 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3343 off-diagonal portion of the local submatrix (possibly different for 3344 each block row) or NULL. 3345 3346 Output Parameter: 3347 . A - the matrix 3348 3349 Options Database Keys: 3350 + -mat_block_size - size of the blocks to use 3351 - -mat_use_hash_table <fact> 3352 3353 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3354 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3355 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3356 3357 Notes: 3358 If the *_nnz parameter is given then the *_nz parameter is ignored 3359 3360 A nonzero block is any block that as 1 or more nonzeros in it 3361 3362 The user MUST specify either the local or global matrix dimensions 3363 (possibly both). 3364 3365 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3366 than it must be used on all processors that share the object for that argument. 3367 3368 Storage Information: 3369 For a square global matrix we define each processor's diagonal portion 3370 to be its local rows and the corresponding columns (a square submatrix); 3371 each processor's off-diagonal portion encompasses the remainder of the 3372 local matrix (a rectangular submatrix). 3373 3374 The user can specify preallocated storage for the diagonal part of 3375 the local submatrix with either d_nz or d_nnz (not both). Set 3376 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3377 memory allocation. Likewise, specify preallocated storage for the 3378 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3379 3380 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3381 the figure below we depict these three local rows and all columns (0-11). 3382 3383 .vb 3384 0 1 2 3 4 5 6 7 8 9 10 11 3385 -------------------------- 3386 row 3 |o o o d d d o o o o o o 3387 row 4 |o o o d d d o o o o o o 3388 row 5 |o o o d d d o o o o o o 3389 -------------------------- 3390 .ve 3391 3392 Thus, any entries in the d locations are stored in the d (diagonal) 3393 submatrix, and any entries in the o locations are stored in the 3394 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3395 stored simply in the MATSEQBAIJ format for compressed row storage. 3396 3397 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3398 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3399 In general, for PDE problems in which most nonzeros are near the diagonal, 3400 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3401 or you will get TERRIBLE performance; see the users' manual chapter on 3402 matrices. 3403 3404 Level: intermediate 3405 3406 .keywords: matrix, block, aij, compressed row, sparse, parallel 3407 3408 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3409 @*/ 3410 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3411 { 3412 PetscErrorCode ierr; 3413 PetscMPIInt size; 3414 3415 PetscFunctionBegin; 3416 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3417 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3418 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3419 if (size > 1) { 3420 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3421 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3422 } else { 3423 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3424 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3425 } 3426 PetscFunctionReturn(0); 3427 } 3428 3429 #undef __FUNCT__ 3430 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3431 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3432 { 3433 Mat mat; 3434 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3435 PetscErrorCode ierr; 3436 PetscInt len=0; 3437 3438 PetscFunctionBegin; 3439 *newmat = 0; 3440 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3441 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3442 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3443 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3444 3445 mat->factortype = matin->factortype; 3446 mat->preallocated = PETSC_TRUE; 3447 mat->assembled = PETSC_TRUE; 3448 mat->insertmode = NOT_SET_VALUES; 3449 3450 a = (Mat_MPIBAIJ*)mat->data; 3451 mat->rmap->bs = matin->rmap->bs; 3452 a->bs2 = oldmat->bs2; 3453 a->mbs = oldmat->mbs; 3454 a->nbs = oldmat->nbs; 3455 a->Mbs = oldmat->Mbs; 3456 a->Nbs = oldmat->Nbs; 3457 3458 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3459 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3460 3461 a->size = oldmat->size; 3462 a->rank = oldmat->rank; 3463 a->donotstash = oldmat->donotstash; 3464 a->roworiented = oldmat->roworiented; 3465 a->rowindices = 0; 3466 a->rowvalues = 0; 3467 a->getrowactive = PETSC_FALSE; 3468 a->barray = 0; 3469 a->rstartbs = oldmat->rstartbs; 3470 a->rendbs = oldmat->rendbs; 3471 a->cstartbs = oldmat->cstartbs; 3472 a->cendbs = oldmat->cendbs; 3473 3474 /* hash table stuff */ 3475 a->ht = 0; 3476 a->hd = 0; 3477 a->ht_size = 0; 3478 a->ht_flag = oldmat->ht_flag; 3479 a->ht_fact = oldmat->ht_fact; 3480 a->ht_total_ct = 0; 3481 a->ht_insert_ct = 0; 3482 3483 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3484 if (oldmat->colmap) { 3485 #if defined(PETSC_USE_CTABLE) 3486 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3487 #else 3488 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 3489 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3490 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3491 #endif 3492 } else a->colmap = 0; 3493 3494 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3495 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 3496 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3497 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3498 } else a->garray = 0; 3499 3500 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3501 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3502 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3503 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3504 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3505 3506 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3507 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3508 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3509 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3510 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3511 *newmat = mat; 3512 PetscFunctionReturn(0); 3513 } 3514 3515 #undef __FUNCT__ 3516 #define __FUNCT__ "MatLoad_MPIBAIJ" 3517 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3518 { 3519 PetscErrorCode ierr; 3520 int fd; 3521 PetscInt i,nz,j,rstart,rend; 3522 PetscScalar *vals,*buf; 3523 MPI_Comm comm; 3524 MPI_Status status; 3525 PetscMPIInt rank,size,maxnz; 3526 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3527 PetscInt *locrowlens = NULL,*procsnz = NULL,*browners = NULL; 3528 PetscInt jj,*mycols,*ibuf,bs = newmat->rmap->bs,Mbs,mbs,extra_rows,mmax; 3529 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3530 PetscInt *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount; 3531 PetscInt dcount,kmax,k,nzcount,tmp,mend; 3532 3533 PetscFunctionBegin; 3534 /* force binary viewer to load .info file if it has not yet done so */ 3535 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3536 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3537 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3538 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3539 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3540 if (bs < 0) bs = 1; 3541 3542 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3543 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3544 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3545 if (!rank) { 3546 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3547 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3548 } 3549 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3550 M = header[1]; N = header[2]; 3551 3552 /* If global sizes are set, check if they are consistent with that given in the file */ 3553 if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M); 3554 if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N); 3555 3556 if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices"); 3557 3558 /* 3559 This code adds extra rows to make sure the number of rows is 3560 divisible by the blocksize 3561 */ 3562 Mbs = M/bs; 3563 extra_rows = bs - M + bs*Mbs; 3564 if (extra_rows == bs) extra_rows = 0; 3565 else Mbs++; 3566 if (extra_rows && !rank) { 3567 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3568 } 3569 3570 /* determine ownership of all rows */ 3571 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3572 mbs = Mbs/size + ((Mbs % size) > rank); 3573 m = mbs*bs; 3574 } else { /* User set */ 3575 m = newmat->rmap->n; 3576 mbs = m/bs; 3577 } 3578 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 3579 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3580 3581 /* process 0 needs enough room for process with most rows */ 3582 if (!rank) { 3583 mmax = rowners[1]; 3584 for (i=2; i<=size; i++) { 3585 mmax = PetscMax(mmax,rowners[i]); 3586 } 3587 mmax*=bs; 3588 } else mmax = -1; /* unused, but compiler warns anyway */ 3589 3590 rowners[0] = 0; 3591 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3592 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3593 rstart = rowners[rank]; 3594 rend = rowners[rank+1]; 3595 3596 /* distribute row lengths to all processors */ 3597 ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr); 3598 if (!rank) { 3599 mend = m; 3600 if (size == 1) mend = mend - extra_rows; 3601 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3602 for (j=mend; j<m; j++) locrowlens[j] = 1; 3603 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3604 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3605 for (j=0; j<m; j++) { 3606 procsnz[0] += locrowlens[j]; 3607 } 3608 for (i=1; i<size; i++) { 3609 mend = browners[i+1] - browners[i]; 3610 if (i == size-1) mend = mend - extra_rows; 3611 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3612 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3613 /* calculate the number of nonzeros on each processor */ 3614 for (j=0; j<browners[i+1]-browners[i]; j++) { 3615 procsnz[i] += rowlengths[j]; 3616 } 3617 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3618 } 3619 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3620 } else { 3621 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3622 } 3623 3624 if (!rank) { 3625 /* determine max buffer needed and allocate it */ 3626 maxnz = procsnz[0]; 3627 for (i=1; i<size; i++) { 3628 maxnz = PetscMax(maxnz,procsnz[i]); 3629 } 3630 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3631 3632 /* read in my part of the matrix column indices */ 3633 nz = procsnz[0]; 3634 ierr = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr); 3635 mycols = ibuf; 3636 if (size == 1) nz -= extra_rows; 3637 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3638 if (size == 1) { 3639 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 3640 } 3641 3642 /* read in every ones (except the last) and ship off */ 3643 for (i=1; i<size-1; i++) { 3644 nz = procsnz[i]; 3645 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3646 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3647 } 3648 /* read in the stuff for the last proc */ 3649 if (size != 1) { 3650 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3651 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3652 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3653 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3654 } 3655 ierr = PetscFree(cols);CHKERRQ(ierr); 3656 } else { 3657 /* determine buffer space needed for message */ 3658 nz = 0; 3659 for (i=0; i<m; i++) { 3660 nz += locrowlens[i]; 3661 } 3662 ierr = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr); 3663 mycols = ibuf; 3664 /* receive message of column indices*/ 3665 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3666 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3667 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3668 } 3669 3670 /* loop over local rows, determining number of off diagonal entries */ 3671 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 3672 ierr = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 3673 rowcount = 0; nzcount = 0; 3674 for (i=0; i<mbs; i++) { 3675 dcount = 0; 3676 odcount = 0; 3677 for (j=0; j<bs; j++) { 3678 kmax = locrowlens[rowcount]; 3679 for (k=0; k<kmax; k++) { 3680 tmp = mycols[nzcount++]/bs; 3681 if (!mask[tmp]) { 3682 mask[tmp] = 1; 3683 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3684 else masked1[dcount++] = tmp; 3685 } 3686 } 3687 rowcount++; 3688 } 3689 3690 dlens[i] = dcount; 3691 odlens[i] = odcount; 3692 3693 /* zero out the mask elements we set */ 3694 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3695 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3696 } 3697 3698 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3699 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3700 3701 if (!rank) { 3702 ierr = PetscMalloc1(maxnz+1,&buf);CHKERRQ(ierr); 3703 /* read in my part of the matrix numerical values */ 3704 nz = procsnz[0]; 3705 vals = buf; 3706 mycols = ibuf; 3707 if (size == 1) nz -= extra_rows; 3708 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3709 if (size == 1) { 3710 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 3711 } 3712 3713 /* insert into matrix */ 3714 jj = rstart*bs; 3715 for (i=0; i<m; i++) { 3716 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3717 mycols += locrowlens[i]; 3718 vals += locrowlens[i]; 3719 jj++; 3720 } 3721 /* read in other processors (except the last one) and ship out */ 3722 for (i=1; i<size-1; i++) { 3723 nz = procsnz[i]; 3724 vals = buf; 3725 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3726 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3727 } 3728 /* the last proc */ 3729 if (size != 1) { 3730 nz = procsnz[i] - extra_rows; 3731 vals = buf; 3732 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3733 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3734 ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3735 } 3736 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3737 } else { 3738 /* receive numeric values */ 3739 ierr = PetscMalloc1(nz+1,&buf);CHKERRQ(ierr); 3740 3741 /* receive message of values*/ 3742 vals = buf; 3743 mycols = ibuf; 3744 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3745 3746 /* insert into matrix */ 3747 jj = rstart*bs; 3748 for (i=0; i<m; i++) { 3749 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3750 mycols += locrowlens[i]; 3751 vals += locrowlens[i]; 3752 jj++; 3753 } 3754 } 3755 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3756 ierr = PetscFree(buf);CHKERRQ(ierr); 3757 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3758 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3759 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3760 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3761 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3762 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3763 PetscFunctionReturn(0); 3764 } 3765 3766 #undef __FUNCT__ 3767 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3768 /*@ 3769 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3770 3771 Input Parameters: 3772 . mat - the matrix 3773 . fact - factor 3774 3775 Not Collective, each process can use a different factor 3776 3777 Level: advanced 3778 3779 Notes: 3780 This can also be set by the command line option: -mat_use_hash_table <fact> 3781 3782 .keywords: matrix, hashtable, factor, HT 3783 3784 .seealso: MatSetOption() 3785 @*/ 3786 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3787 { 3788 PetscErrorCode ierr; 3789 3790 PetscFunctionBegin; 3791 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3792 PetscFunctionReturn(0); 3793 } 3794 3795 #undef __FUNCT__ 3796 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3797 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3798 { 3799 Mat_MPIBAIJ *baij; 3800 3801 PetscFunctionBegin; 3802 baij = (Mat_MPIBAIJ*)mat->data; 3803 baij->ht_fact = fact; 3804 PetscFunctionReturn(0); 3805 } 3806 3807 #undef __FUNCT__ 3808 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3809 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3810 { 3811 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3812 3813 PetscFunctionBegin; 3814 if (Ad) *Ad = a->A; 3815 if (Ao) *Ao = a->B; 3816 if (colmap) *colmap = a->garray; 3817 PetscFunctionReturn(0); 3818 } 3819 3820 /* 3821 Special version for direct calls from Fortran (to eliminate two function call overheads 3822 */ 3823 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3824 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3825 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3826 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3827 #endif 3828 3829 #undef __FUNCT__ 3830 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3831 /*@C 3832 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3833 3834 Collective on Mat 3835 3836 Input Parameters: 3837 + mat - the matrix 3838 . min - number of input rows 3839 . im - input rows 3840 . nin - number of input columns 3841 . in - input columns 3842 . v - numerical values input 3843 - addvin - INSERT_VALUES or ADD_VALUES 3844 3845 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3846 3847 Level: advanced 3848 3849 .seealso: MatSetValuesBlocked() 3850 @*/ 3851 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3852 { 3853 /* convert input arguments to C version */ 3854 Mat mat = *matin; 3855 PetscInt m = *min, n = *nin; 3856 InsertMode addv = *addvin; 3857 3858 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3859 const MatScalar *value; 3860 MatScalar *barray = baij->barray; 3861 PetscBool roworiented = baij->roworiented; 3862 PetscErrorCode ierr; 3863 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3864 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3865 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3866 3867 PetscFunctionBegin; 3868 /* tasks normally handled by MatSetValuesBlocked() */ 3869 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3870 #if defined(PETSC_USE_DEBUG) 3871 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3872 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3873 #endif 3874 if (mat->assembled) { 3875 mat->was_assembled = PETSC_TRUE; 3876 mat->assembled = PETSC_FALSE; 3877 } 3878 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3879 3880 3881 if (!barray) { 3882 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 3883 baij->barray = barray; 3884 } 3885 3886 if (roworiented) stepval = (n-1)*bs; 3887 else stepval = (m-1)*bs; 3888 3889 for (i=0; i<m; i++) { 3890 if (im[i] < 0) continue; 3891 #if defined(PETSC_USE_DEBUG) 3892 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3893 #endif 3894 if (im[i] >= rstart && im[i] < rend) { 3895 row = im[i] - rstart; 3896 for (j=0; j<n; j++) { 3897 /* If NumCol = 1 then a copy is not required */ 3898 if ((roworiented) && (n == 1)) { 3899 barray = (MatScalar*)v + i*bs2; 3900 } else if ((!roworiented) && (m == 1)) { 3901 barray = (MatScalar*)v + j*bs2; 3902 } else { /* Here a copy is required */ 3903 if (roworiented) { 3904 value = v + i*(stepval+bs)*bs + j*bs; 3905 } else { 3906 value = v + j*(stepval+bs)*bs + i*bs; 3907 } 3908 for (ii=0; ii<bs; ii++,value+=stepval) { 3909 for (jj=0; jj<bs; jj++) { 3910 *barray++ = *value++; 3911 } 3912 } 3913 barray -=bs2; 3914 } 3915 3916 if (in[j] >= cstart && in[j] < cend) { 3917 col = in[j] - cstart; 3918 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 3919 } else if (in[j] < 0) continue; 3920 #if defined(PETSC_USE_DEBUG) 3921 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 3922 #endif 3923 else { 3924 if (mat->was_assembled) { 3925 if (!baij->colmap) { 3926 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3927 } 3928 3929 #if defined(PETSC_USE_DEBUG) 3930 #if defined(PETSC_USE_CTABLE) 3931 { PetscInt data; 3932 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3933 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3934 } 3935 #else 3936 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3937 #endif 3938 #endif 3939 #if defined(PETSC_USE_CTABLE) 3940 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3941 col = (col - 1)/bs; 3942 #else 3943 col = (baij->colmap[in[j]] - 1)/bs; 3944 #endif 3945 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3946 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3947 col = in[j]; 3948 } 3949 } else col = in[j]; 3950 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 3951 } 3952 } 3953 } else { 3954 if (!baij->donotstash) { 3955 if (roworiented) { 3956 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3957 } else { 3958 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3959 } 3960 } 3961 } 3962 } 3963 3964 /* task normally handled by MatSetValuesBlocked() */ 3965 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3966 PetscFunctionReturn(0); 3967 } 3968 3969 #undef __FUNCT__ 3970 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 3971 /*@ 3972 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 3973 CSR format the local rows. 3974 3975 Collective on MPI_Comm 3976 3977 Input Parameters: 3978 + comm - MPI communicator 3979 . bs - the block size, only a block size of 1 is supported 3980 . m - number of local rows (Cannot be PETSC_DECIDE) 3981 . n - This value should be the same as the local size used in creating the 3982 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3983 calculated if N is given) For square matrices n is almost always m. 3984 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3985 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3986 . i - row indices 3987 . j - column indices 3988 - a - matrix values 3989 3990 Output Parameter: 3991 . mat - the matrix 3992 3993 Level: intermediate 3994 3995 Notes: 3996 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3997 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3998 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3999 4000 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 4001 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 4002 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 4003 with column-major ordering within blocks. 4004 4005 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4006 4007 .keywords: matrix, aij, compressed row, sparse, parallel 4008 4009 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4010 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4011 @*/ 4012 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4013 { 4014 PetscErrorCode ierr; 4015 4016 PetscFunctionBegin; 4017 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4018 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4019 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4020 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4021 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 4022 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 4023 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 4024 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 4025 PetscFunctionReturn(0); 4026 } 4027 4028 #undef __FUNCT__ 4029 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIBAIJ" 4030 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4031 { 4032 PetscErrorCode ierr; 4033 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 4034 PetscInt *indx; 4035 PetscScalar *values; 4036 4037 PetscFunctionBegin; 4038 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4039 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4040 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inmat->data; 4041 PetscInt *dnz,*onz,sum,mbs,Nbs; 4042 PetscInt *bindx,rmax=a->rmax,j; 4043 4044 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4045 mbs = m/bs; Nbs = N/cbs; 4046 if (n == PETSC_DECIDE) { 4047 ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr); 4048 } 4049 /* Check sum(n) = Nbs */ 4050 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4051 if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs); 4052 4053 ierr = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4054 rstart -= mbs; 4055 4056 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 4057 ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr); 4058 for (i=0; i<mbs; i++) { 4059 ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 4060 nnz = nnz/bs; 4061 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 4062 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 4063 ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 4064 } 4065 ierr = PetscFree(bindx);CHKERRQ(ierr); 4066 4067 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4068 ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4069 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4070 ierr = MatSetType(*outmat,MATMPIBAIJ);CHKERRQ(ierr); 4071 ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 4072 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4073 } 4074 4075 /* numeric phase */ 4076 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4077 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4078 4079 for (i=0; i<m; i++) { 4080 ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4081 Ii = i + rstart; 4082 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4083 ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4084 } 4085 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4086 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4087 PetscFunctionReturn(0); 4088 } 4089