1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 #undef __FUNCT__ 8 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 9 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 10 { 11 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 12 PetscErrorCode ierr; 13 PetscInt i,*idxb = 0; 14 PetscScalar *va,*vb; 15 Vec vtmp; 16 17 PetscFunctionBegin; 18 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 19 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 20 if (idx) { 21 for (i=0; i<A->rmap->n; i++) { 22 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 23 } 24 } 25 26 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 27 if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);} 28 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 29 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 30 31 for (i=0; i<A->rmap->n; i++) { 32 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 33 va[i] = vb[i]; 34 if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs); 35 } 36 } 37 38 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 39 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 40 ierr = PetscFree(idxb);CHKERRQ(ierr); 41 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 42 PetscFunctionReturn(0); 43 } 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 47 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 48 { 49 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 54 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 55 PetscFunctionReturn(0); 56 } 57 58 #undef __FUNCT__ 59 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 60 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 61 { 62 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 63 PetscErrorCode ierr; 64 65 PetscFunctionBegin; 66 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 67 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 68 PetscFunctionReturn(0); 69 } 70 71 /* 72 Local utility routine that creates a mapping from the global column 73 number to the local number in the off-diagonal part of the local 74 storage of the matrix. This is done in a non scalable way since the 75 length of colmap equals the global matrix length. 76 */ 77 #undef __FUNCT__ 78 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private" 79 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 80 { 81 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 82 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 83 PetscErrorCode ierr; 84 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 85 86 PetscFunctionBegin; 87 #if defined(PETSC_USE_CTABLE) 88 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 89 for (i=0; i<nbs; i++) { 90 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 91 } 92 #else 93 ierr = PetscMalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 94 ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 95 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 96 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 97 #endif 98 PetscFunctionReturn(0); 99 } 100 101 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 102 { \ 103 \ 104 brow = row/bs; \ 105 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 106 rmax = aimax[brow]; nrow = ailen[brow]; \ 107 bcol = col/bs; \ 108 ridx = row % bs; cidx = col % bs; \ 109 low = 0; high = nrow; \ 110 while (high-low > 3) { \ 111 t = (low+high)/2; \ 112 if (rp[t] > bcol) high = t; \ 113 else low = t; \ 114 } \ 115 for (_i=low; _i<high; _i++) { \ 116 if (rp[_i] > bcol) break; \ 117 if (rp[_i] == bcol) { \ 118 bap = ap + bs2*_i + bs*cidx + ridx; \ 119 if (addv == ADD_VALUES) *bap += value; \ 120 else *bap = value; \ 121 goto a_noinsert; \ 122 } \ 123 } \ 124 if (a->nonew == 1) goto a_noinsert; \ 125 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 126 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 127 N = nrow++ - 1; \ 128 /* shift up all the later entries in this row */ \ 129 for (ii=N; ii>=_i; ii--) { \ 130 rp[ii+1] = rp[ii]; \ 131 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 132 } \ 133 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 134 rp[_i] = bcol; \ 135 ap[bs2*_i + bs*cidx + ridx] = value; \ 136 a_noinsert:; \ 137 ailen[brow] = nrow; \ 138 } 139 140 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 141 { \ 142 brow = row/bs; \ 143 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 144 rmax = bimax[brow]; nrow = bilen[brow]; \ 145 bcol = col/bs; \ 146 ridx = row % bs; cidx = col % bs; \ 147 low = 0; high = nrow; \ 148 while (high-low > 3) { \ 149 t = (low+high)/2; \ 150 if (rp[t] > bcol) high = t; \ 151 else low = t; \ 152 } \ 153 for (_i=low; _i<high; _i++) { \ 154 if (rp[_i] > bcol) break; \ 155 if (rp[_i] == bcol) { \ 156 bap = ap + bs2*_i + bs*cidx + ridx; \ 157 if (addv == ADD_VALUES) *bap += value; \ 158 else *bap = value; \ 159 goto b_noinsert; \ 160 } \ 161 } \ 162 if (b->nonew == 1) goto b_noinsert; \ 163 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 164 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 165 N = nrow++ - 1; \ 166 /* shift up all the later entries in this row */ \ 167 for (ii=N; ii>=_i; ii--) { \ 168 rp[ii+1] = rp[ii]; \ 169 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 170 } \ 171 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 172 rp[_i] = bcol; \ 173 ap[bs2*_i + bs*cidx + ridx] = value; \ 174 b_noinsert:; \ 175 bilen[brow] = nrow; \ 176 } 177 178 #undef __FUNCT__ 179 #define __FUNCT__ "MatSetValues_MPIBAIJ" 180 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 181 { 182 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 183 MatScalar value; 184 PetscBool roworiented = baij->roworiented; 185 PetscErrorCode ierr; 186 PetscInt i,j,row,col; 187 PetscInt rstart_orig=mat->rmap->rstart; 188 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 189 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 190 191 /* Some Variables required in the macro */ 192 Mat A = baij->A; 193 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 194 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 195 MatScalar *aa =a->a; 196 197 Mat B = baij->B; 198 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 199 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 200 MatScalar *ba =b->a; 201 202 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 203 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 204 MatScalar *ap,*bap; 205 206 PetscFunctionBegin; 207 for (i=0; i<m; i++) { 208 if (im[i] < 0) continue; 209 #if defined(PETSC_USE_DEBUG) 210 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 211 #endif 212 if (im[i] >= rstart_orig && im[i] < rend_orig) { 213 row = im[i] - rstart_orig; 214 for (j=0; j<n; j++) { 215 if (in[j] >= cstart_orig && in[j] < cend_orig) { 216 col = in[j] - cstart_orig; 217 if (roworiented) value = v[i*n+j]; 218 else value = v[i+j*m]; 219 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 220 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 221 } else if (in[j] < 0) continue; 222 #if defined(PETSC_USE_DEBUG) 223 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 224 #endif 225 else { 226 if (mat->was_assembled) { 227 if (!baij->colmap) { 228 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 229 } 230 #if defined(PETSC_USE_CTABLE) 231 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 232 col = col - 1; 233 #else 234 col = baij->colmap[in[j]/bs] - 1; 235 #endif 236 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 237 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 238 col = in[j]; 239 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 240 B = baij->B; 241 b = (Mat_SeqBAIJ*)(B)->data; 242 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 243 ba =b->a; 244 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 245 else col += in[j]%bs; 246 } else col = in[j]; 247 if (roworiented) value = v[i*n+j]; 248 else value = v[i+j*m]; 249 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 250 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 251 } 252 } 253 } else { 254 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 255 if (!baij->donotstash) { 256 mat->assembled = PETSC_FALSE; 257 if (roworiented) { 258 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 259 } else { 260 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 261 } 262 } 263 } 264 } 265 PetscFunctionReturn(0); 266 } 267 268 #undef __FUNCT__ 269 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ_Inlined" 270 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 271 { 272 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 273 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 274 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 275 PetscErrorCode ierr; 276 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 277 PetscBool roworiented=a->roworiented; 278 const PetscScalar *value = v; 279 MatScalar *ap,*aa = a->a,*bap; 280 281 PetscFunctionBegin; 282 rp = aj + ai[row]; 283 ap = aa + bs2*ai[row]; 284 rmax = imax[row]; 285 nrow = ailen[row]; 286 low = 0; 287 high = nrow; 288 value = v; 289 low = 0; 290 high = nrow; 291 while (high-low > 7) { 292 t = (low+high)/2; 293 if (rp[t] > col) high = t; 294 else low = t; 295 } 296 for (i=low; i<high; i++) { 297 if (rp[i] > col) break; 298 if (rp[i] == col) { 299 bap = ap + bs2*i; 300 if (roworiented) { 301 if (is == ADD_VALUES) { 302 for (ii=0; ii<bs; ii++) { 303 for (jj=ii; jj<bs2; jj+=bs) { 304 bap[jj] += *value++; 305 } 306 } 307 } else { 308 for (ii=0; ii<bs; ii++) { 309 for (jj=ii; jj<bs2; jj+=bs) { 310 bap[jj] = *value++; 311 } 312 } 313 } 314 } else { 315 if (is == ADD_VALUES) { 316 for (ii=0; ii<bs; ii++,value+=bs) { 317 for (jj=0; jj<bs; jj++) { 318 bap[jj] += value[jj]; 319 } 320 bap += bs; 321 } 322 } else { 323 for (ii=0; ii<bs; ii++,value+=bs) { 324 for (jj=0; jj<bs; jj++) { 325 bap[jj] = value[jj]; 326 } 327 bap += bs; 328 } 329 } 330 } 331 goto noinsert2; 332 } 333 } 334 if (nonew == 1) goto noinsert2; 335 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 336 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 337 N = nrow++ - 1; high++; 338 /* shift up all the later entries in this row */ 339 for (ii=N; ii>=i; ii--) { 340 rp[ii+1] = rp[ii]; 341 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 342 } 343 if (N >= i) { 344 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 345 } 346 rp[i] = col; 347 bap = ap + bs2*i; 348 if (roworiented) { 349 for (ii=0; ii<bs; ii++) { 350 for (jj=ii; jj<bs2; jj+=bs) { 351 bap[jj] = *value++; 352 } 353 } 354 } else { 355 for (ii=0; ii<bs; ii++) { 356 for (jj=0; jj<bs; jj++) { 357 *bap++ = *value++; 358 } 359 } 360 } 361 noinsert2:; 362 ailen[row] = nrow; 363 PetscFunctionReturn(0); 364 } 365 366 #undef __FUNCT__ 367 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 368 /* 369 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 370 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 371 */ 372 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 373 { 374 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 375 const PetscScalar *value; 376 MatScalar *barray = baij->barray; 377 PetscBool roworiented = baij->roworiented; 378 PetscErrorCode ierr; 379 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 380 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 381 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 382 383 PetscFunctionBegin; 384 if (!barray) { 385 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 386 baij->barray = barray; 387 } 388 389 if (roworiented) stepval = (n-1)*bs; 390 else stepval = (m-1)*bs; 391 392 for (i=0; i<m; i++) { 393 if (im[i] < 0) continue; 394 #if defined(PETSC_USE_DEBUG) 395 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 396 #endif 397 if (im[i] >= rstart && im[i] < rend) { 398 row = im[i] - rstart; 399 for (j=0; j<n; j++) { 400 /* If NumCol = 1 then a copy is not required */ 401 if ((roworiented) && (n == 1)) { 402 barray = (MatScalar*)v + i*bs2; 403 } else if ((!roworiented) && (m == 1)) { 404 barray = (MatScalar*)v + j*bs2; 405 } else { /* Here a copy is required */ 406 if (roworiented) { 407 value = v + (i*(stepval+bs) + j)*bs; 408 } else { 409 value = v + (j*(stepval+bs) + i)*bs; 410 } 411 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 412 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 413 barray += bs; 414 } 415 barray -= bs2; 416 } 417 418 if (in[j] >= cstart && in[j] < cend) { 419 col = in[j] - cstart; 420 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 421 } else if (in[j] < 0) continue; 422 #if defined(PETSC_USE_DEBUG) 423 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 424 #endif 425 else { 426 if (mat->was_assembled) { 427 if (!baij->colmap) { 428 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 429 } 430 431 #if defined(PETSC_USE_DEBUG) 432 #if defined(PETSC_USE_CTABLE) 433 { PetscInt data; 434 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 435 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 436 } 437 #else 438 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 439 #endif 440 #endif 441 #if defined(PETSC_USE_CTABLE) 442 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 443 col = (col - 1)/bs; 444 #else 445 col = (baij->colmap[in[j]] - 1)/bs; 446 #endif 447 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 448 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 449 col = in[j]; 450 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]); 451 } else col = in[j]; 452 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 453 } 454 } 455 } else { 456 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 457 if (!baij->donotstash) { 458 if (roworiented) { 459 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 460 } else { 461 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 462 } 463 } 464 } 465 } 466 PetscFunctionReturn(0); 467 } 468 469 #define HASH_KEY 0.6180339887 470 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 471 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 472 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 473 #undef __FUNCT__ 474 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 475 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 476 { 477 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 478 PetscBool roworiented = baij->roworiented; 479 PetscErrorCode ierr; 480 PetscInt i,j,row,col; 481 PetscInt rstart_orig=mat->rmap->rstart; 482 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 483 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 484 PetscReal tmp; 485 MatScalar **HD = baij->hd,value; 486 #if defined(PETSC_USE_DEBUG) 487 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 488 #endif 489 490 PetscFunctionBegin; 491 for (i=0; i<m; i++) { 492 #if defined(PETSC_USE_DEBUG) 493 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 494 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 495 #endif 496 row = im[i]; 497 if (row >= rstart_orig && row < rend_orig) { 498 for (j=0; j<n; j++) { 499 col = in[j]; 500 if (roworiented) value = v[i*n+j]; 501 else value = v[i+j*m]; 502 /* Look up PetscInto the Hash Table */ 503 key = (row/bs)*Nbs+(col/bs)+1; 504 h1 = HASH(size,key,tmp); 505 506 507 idx = h1; 508 #if defined(PETSC_USE_DEBUG) 509 insert_ct++; 510 total_ct++; 511 if (HT[idx] != key) { 512 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 513 if (idx == size) { 514 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 515 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 516 } 517 } 518 #else 519 if (HT[idx] != key) { 520 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 521 if (idx == size) { 522 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 523 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 524 } 525 } 526 #endif 527 /* A HASH table entry is found, so insert the values at the correct address */ 528 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 529 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 530 } 531 } else if (!baij->donotstash) { 532 if (roworiented) { 533 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 534 } else { 535 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 536 } 537 } 538 } 539 #if defined(PETSC_USE_DEBUG) 540 baij->ht_total_ct = total_ct; 541 baij->ht_insert_ct = insert_ct; 542 #endif 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 548 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 549 { 550 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 551 PetscBool roworiented = baij->roworiented; 552 PetscErrorCode ierr; 553 PetscInt i,j,ii,jj,row,col; 554 PetscInt rstart=baij->rstartbs; 555 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 556 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 557 PetscReal tmp; 558 MatScalar **HD = baij->hd,*baij_a; 559 const PetscScalar *v_t,*value; 560 #if defined(PETSC_USE_DEBUG) 561 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 562 #endif 563 564 PetscFunctionBegin; 565 if (roworiented) stepval = (n-1)*bs; 566 else stepval = (m-1)*bs; 567 568 for (i=0; i<m; i++) { 569 #if defined(PETSC_USE_DEBUG) 570 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 571 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 572 #endif 573 row = im[i]; 574 v_t = v + i*nbs2; 575 if (row >= rstart && row < rend) { 576 for (j=0; j<n; j++) { 577 col = in[j]; 578 579 /* Look up into the Hash Table */ 580 key = row*Nbs+col+1; 581 h1 = HASH(size,key,tmp); 582 583 idx = h1; 584 #if defined(PETSC_USE_DEBUG) 585 total_ct++; 586 insert_ct++; 587 if (HT[idx] != key) { 588 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 589 if (idx == size) { 590 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 591 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 592 } 593 } 594 #else 595 if (HT[idx] != key) { 596 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 597 if (idx == size) { 598 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 599 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 600 } 601 } 602 #endif 603 baij_a = HD[idx]; 604 if (roworiented) { 605 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 606 /* value = v + (i*(stepval+bs)+j)*bs; */ 607 value = v_t; 608 v_t += bs; 609 if (addv == ADD_VALUES) { 610 for (ii=0; ii<bs; ii++,value+=stepval) { 611 for (jj=ii; jj<bs2; jj+=bs) { 612 baij_a[jj] += *value++; 613 } 614 } 615 } else { 616 for (ii=0; ii<bs; ii++,value+=stepval) { 617 for (jj=ii; jj<bs2; jj+=bs) { 618 baij_a[jj] = *value++; 619 } 620 } 621 } 622 } else { 623 value = v + j*(stepval+bs)*bs + i*bs; 624 if (addv == ADD_VALUES) { 625 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 626 for (jj=0; jj<bs; jj++) { 627 baij_a[jj] += *value++; 628 } 629 } 630 } else { 631 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 632 for (jj=0; jj<bs; jj++) { 633 baij_a[jj] = *value++; 634 } 635 } 636 } 637 } 638 } 639 } else { 640 if (!baij->donotstash) { 641 if (roworiented) { 642 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 643 } else { 644 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 645 } 646 } 647 } 648 } 649 #if defined(PETSC_USE_DEBUG) 650 baij->ht_total_ct = total_ct; 651 baij->ht_insert_ct = insert_ct; 652 #endif 653 PetscFunctionReturn(0); 654 } 655 656 #undef __FUNCT__ 657 #define __FUNCT__ "MatGetValues_MPIBAIJ" 658 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 659 { 660 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 661 PetscErrorCode ierr; 662 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 663 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 664 665 PetscFunctionBegin; 666 for (i=0; i<m; i++) { 667 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 668 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 669 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 670 row = idxm[i] - bsrstart; 671 for (j=0; j<n; j++) { 672 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 673 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 674 if (idxn[j] >= bscstart && idxn[j] < bscend) { 675 col = idxn[j] - bscstart; 676 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 677 } else { 678 if (!baij->colmap) { 679 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 680 } 681 #if defined(PETSC_USE_CTABLE) 682 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 683 data--; 684 #else 685 data = baij->colmap[idxn[j]/bs]-1; 686 #endif 687 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 688 else { 689 col = data + idxn[j]%bs; 690 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 691 } 692 } 693 } 694 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 695 } 696 PetscFunctionReturn(0); 697 } 698 699 #undef __FUNCT__ 700 #define __FUNCT__ "MatNorm_MPIBAIJ" 701 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 702 { 703 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 704 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 705 PetscErrorCode ierr; 706 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 707 PetscReal sum = 0.0; 708 MatScalar *v; 709 710 PetscFunctionBegin; 711 if (baij->size == 1) { 712 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 713 } else { 714 if (type == NORM_FROBENIUS) { 715 v = amat->a; 716 nz = amat->nz*bs2; 717 for (i=0; i<nz; i++) { 718 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 719 } 720 v = bmat->a; 721 nz = bmat->nz*bs2; 722 for (i=0; i<nz; i++) { 723 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 724 } 725 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 726 *nrm = PetscSqrtReal(*nrm); 727 } else if (type == NORM_1) { /* max column sum */ 728 PetscReal *tmp,*tmp2; 729 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 730 ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr); 731 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 732 v = amat->a; jj = amat->j; 733 for (i=0; i<amat->nz; i++) { 734 for (j=0; j<bs; j++) { 735 col = bs*(cstart + *jj) + j; /* column index */ 736 for (row=0; row<bs; row++) { 737 tmp[col] += PetscAbsScalar(*v); v++; 738 } 739 } 740 jj++; 741 } 742 v = bmat->a; jj = bmat->j; 743 for (i=0; i<bmat->nz; i++) { 744 for (j=0; j<bs; j++) { 745 col = bs*garray[*jj] + j; 746 for (row=0; row<bs; row++) { 747 tmp[col] += PetscAbsScalar(*v); v++; 748 } 749 } 750 jj++; 751 } 752 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 753 *nrm = 0.0; 754 for (j=0; j<mat->cmap->N; j++) { 755 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 756 } 757 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 758 } else if (type == NORM_INFINITY) { /* max row sum */ 759 PetscReal *sums; 760 ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr); 761 sum = 0.0; 762 for (j=0; j<amat->mbs; j++) { 763 for (row=0; row<bs; row++) sums[row] = 0.0; 764 v = amat->a + bs2*amat->i[j]; 765 nz = amat->i[j+1]-amat->i[j]; 766 for (i=0; i<nz; i++) { 767 for (col=0; col<bs; col++) { 768 for (row=0; row<bs; row++) { 769 sums[row] += PetscAbsScalar(*v); v++; 770 } 771 } 772 } 773 v = bmat->a + bs2*bmat->i[j]; 774 nz = bmat->i[j+1]-bmat->i[j]; 775 for (i=0; i<nz; i++) { 776 for (col=0; col<bs; col++) { 777 for (row=0; row<bs; row++) { 778 sums[row] += PetscAbsScalar(*v); v++; 779 } 780 } 781 } 782 for (row=0; row<bs; row++) { 783 if (sums[row] > sum) sum = sums[row]; 784 } 785 } 786 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 787 ierr = PetscFree(sums);CHKERRQ(ierr); 788 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 789 } 790 PetscFunctionReturn(0); 791 } 792 793 /* 794 Creates the hash table, and sets the table 795 This table is created only once. 796 If new entried need to be added to the matrix 797 then the hash table has to be destroyed and 798 recreated. 799 */ 800 #undef __FUNCT__ 801 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 802 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 803 { 804 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 805 Mat A = baij->A,B=baij->B; 806 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 807 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 808 PetscErrorCode ierr; 809 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 810 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 811 PetscInt *HT,key; 812 MatScalar **HD; 813 PetscReal tmp; 814 #if defined(PETSC_USE_INFO) 815 PetscInt ct=0,max=0; 816 #endif 817 818 PetscFunctionBegin; 819 if (baij->ht) PetscFunctionReturn(0); 820 821 baij->ht_size = (PetscInt)(factor*nz); 822 ht_size = baij->ht_size; 823 824 /* Allocate Memory for Hash Table */ 825 ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr); 826 HD = baij->hd; 827 HT = baij->ht; 828 829 /* Loop Over A */ 830 for (i=0; i<a->mbs; i++) { 831 for (j=ai[i]; j<ai[i+1]; j++) { 832 row = i+rstart; 833 col = aj[j]+cstart; 834 835 key = row*Nbs + col + 1; 836 h1 = HASH(ht_size,key,tmp); 837 for (k=0; k<ht_size; k++) { 838 if (!HT[(h1+k)%ht_size]) { 839 HT[(h1+k)%ht_size] = key; 840 HD[(h1+k)%ht_size] = a->a + j*bs2; 841 break; 842 #if defined(PETSC_USE_INFO) 843 } else { 844 ct++; 845 #endif 846 } 847 } 848 #if defined(PETSC_USE_INFO) 849 if (k> max) max = k; 850 #endif 851 } 852 } 853 /* Loop Over B */ 854 for (i=0; i<b->mbs; i++) { 855 for (j=bi[i]; j<bi[i+1]; j++) { 856 row = i+rstart; 857 col = garray[bj[j]]; 858 key = row*Nbs + col + 1; 859 h1 = HASH(ht_size,key,tmp); 860 for (k=0; k<ht_size; k++) { 861 if (!HT[(h1+k)%ht_size]) { 862 HT[(h1+k)%ht_size] = key; 863 HD[(h1+k)%ht_size] = b->a + j*bs2; 864 break; 865 #if defined(PETSC_USE_INFO) 866 } else { 867 ct++; 868 #endif 869 } 870 } 871 #if defined(PETSC_USE_INFO) 872 if (k> max) max = k; 873 #endif 874 } 875 } 876 877 /* Print Summary */ 878 #if defined(PETSC_USE_INFO) 879 for (i=0,j=0; i<ht_size; i++) { 880 if (HT[i]) j++; 881 } 882 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 883 #endif 884 PetscFunctionReturn(0); 885 } 886 887 #undef __FUNCT__ 888 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 889 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 890 { 891 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 892 PetscErrorCode ierr; 893 PetscInt nstash,reallocs; 894 InsertMode addv; 895 896 PetscFunctionBegin; 897 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 898 899 /* make sure all processors are either in INSERTMODE or ADDMODE */ 900 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 901 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 902 mat->insertmode = addv; /* in case this processor had no cache */ 903 904 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 905 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 906 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 907 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 908 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 909 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 910 PetscFunctionReturn(0); 911 } 912 913 #undef __FUNCT__ 914 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 915 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 916 { 917 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 918 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 919 PetscErrorCode ierr; 920 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 921 PetscInt *row,*col; 922 PetscBool r1,r2,r3,other_disassembled; 923 MatScalar *val; 924 InsertMode addv = mat->insertmode; 925 PetscMPIInt n; 926 927 PetscFunctionBegin; 928 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 929 if (!baij->donotstash && !mat->nooffprocentries) { 930 while (1) { 931 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 932 if (!flg) break; 933 934 for (i=0; i<n;) { 935 /* Now identify the consecutive vals belonging to the same row */ 936 for (j=i,rstart=row[j]; j<n; j++) { 937 if (row[j] != rstart) break; 938 } 939 if (j < n) ncols = j-i; 940 else ncols = n-i; 941 /* Now assemble all these values with a single function call */ 942 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 943 i = j; 944 } 945 } 946 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 947 /* Now process the block-stash. Since the values are stashed column-oriented, 948 set the roworiented flag to column oriented, and after MatSetValues() 949 restore the original flags */ 950 r1 = baij->roworiented; 951 r2 = a->roworiented; 952 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 953 954 baij->roworiented = PETSC_FALSE; 955 a->roworiented = PETSC_FALSE; 956 957 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 958 while (1) { 959 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 960 if (!flg) break; 961 962 for (i=0; i<n;) { 963 /* Now identify the consecutive vals belonging to the same row */ 964 for (j=i,rstart=row[j]; j<n; j++) { 965 if (row[j] != rstart) break; 966 } 967 if (j < n) ncols = j-i; 968 else ncols = n-i; 969 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 970 i = j; 971 } 972 } 973 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 974 975 baij->roworiented = r1; 976 a->roworiented = r2; 977 978 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 979 } 980 981 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 982 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 983 984 /* determine if any processor has disassembled, if so we must 985 also disassemble ourselfs, in order that we may reassemble. */ 986 /* 987 if nonzero structure of submatrix B cannot change then we know that 988 no processor disassembled thus we can skip this stuff 989 */ 990 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 991 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 992 if (mat->was_assembled && !other_disassembled) { 993 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 994 } 995 } 996 997 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 998 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 999 } 1000 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1001 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1002 1003 #if defined(PETSC_USE_INFO) 1004 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1005 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 1006 1007 baij->ht_total_ct = 0; 1008 baij->ht_insert_ct = 0; 1009 } 1010 #endif 1011 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1012 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1013 1014 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1015 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1016 } 1017 1018 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1019 1020 baij->rowvalues = 0; 1021 1022 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 1023 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 1024 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 1025 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1026 } 1027 PetscFunctionReturn(0); 1028 } 1029 1030 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer); 1031 #include <petscdraw.h> 1032 #undef __FUNCT__ 1033 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 1034 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1035 { 1036 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1037 PetscErrorCode ierr; 1038 PetscMPIInt rank = baij->rank; 1039 PetscInt bs = mat->rmap->bs; 1040 PetscBool iascii,isdraw; 1041 PetscViewer sviewer; 1042 PetscViewerFormat format; 1043 1044 PetscFunctionBegin; 1045 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1046 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1047 if (iascii) { 1048 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1049 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1050 MatInfo info; 1051 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1052 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1053 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1054 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 1055 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 1056 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1057 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1058 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1059 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1060 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1061 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1062 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1063 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1064 PetscFunctionReturn(0); 1065 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1066 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1067 PetscFunctionReturn(0); 1068 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1069 PetscFunctionReturn(0); 1070 } 1071 } 1072 1073 if (isdraw) { 1074 PetscDraw draw; 1075 PetscBool isnull; 1076 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1077 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1078 } 1079 1080 { 1081 /* assemble the entire matrix onto first processor. */ 1082 Mat A; 1083 Mat_SeqBAIJ *Aloc; 1084 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1085 MatScalar *a; 1086 const char *matname; 1087 1088 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1089 /* Perhaps this should be the type of mat? */ 1090 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1091 if (!rank) { 1092 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1093 } else { 1094 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1095 } 1096 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1097 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 1098 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1099 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1100 1101 /* copy over the A part */ 1102 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1103 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1104 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1105 1106 for (i=0; i<mbs; i++) { 1107 rvals[0] = bs*(baij->rstartbs + i); 1108 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1109 for (j=ai[i]; j<ai[i+1]; j++) { 1110 col = (baij->cstartbs+aj[j])*bs; 1111 for (k=0; k<bs; k++) { 1112 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1113 col++; a += bs; 1114 } 1115 } 1116 } 1117 /* copy over the B part */ 1118 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1119 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1120 for (i=0; i<mbs; i++) { 1121 rvals[0] = bs*(baij->rstartbs + i); 1122 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1123 for (j=ai[i]; j<ai[i+1]; j++) { 1124 col = baij->garray[aj[j]]*bs; 1125 for (k=0; k<bs; k++) { 1126 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1127 col++; a += bs; 1128 } 1129 } 1130 } 1131 ierr = PetscFree(rvals);CHKERRQ(ierr); 1132 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1133 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1134 /* 1135 Everyone has to call to draw the matrix since the graphics waits are 1136 synchronized across all processors that share the PetscDraw object 1137 */ 1138 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1139 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 1140 if (!rank) { 1141 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 1142 ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1143 } 1144 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1145 ierr = MatDestroy(&A);CHKERRQ(ierr); 1146 } 1147 PetscFunctionReturn(0); 1148 } 1149 1150 #undef __FUNCT__ 1151 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1152 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1153 { 1154 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1155 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)a->A->data; 1156 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 1157 PetscErrorCode ierr; 1158 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1159 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1160 int fd; 1161 PetscScalar *column_values; 1162 FILE *file; 1163 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1164 PetscInt message_count,flowcontrolcount; 1165 1166 PetscFunctionBegin; 1167 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1168 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1169 nz = bs2*(A->nz + B->nz); 1170 rlen = mat->rmap->n; 1171 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1172 if (!rank) { 1173 header[0] = MAT_FILE_CLASSID; 1174 header[1] = mat->rmap->N; 1175 header[2] = mat->cmap->N; 1176 1177 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1178 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1179 /* get largest number of rows any processor has */ 1180 range = mat->rmap->range; 1181 for (i=1; i<size; i++) { 1182 rlen = PetscMax(rlen,range[i+1] - range[i]); 1183 } 1184 } else { 1185 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1186 } 1187 1188 ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr); 1189 /* compute lengths of each row */ 1190 for (i=0; i<a->mbs; i++) { 1191 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1192 } 1193 /* store the row lengths to the file */ 1194 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1195 if (!rank) { 1196 MPI_Status status; 1197 ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr); 1198 rlen = (range[1] - range[0])/bs; 1199 for (i=0; i<rlen; i++) { 1200 for (j=0; j<bs; j++) { 1201 row_lens[i*bs+j] = bs*crow_lens[i]; 1202 } 1203 } 1204 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1205 for (i=1; i<size; i++) { 1206 rlen = (range[i+1] - range[i])/bs; 1207 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1208 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1209 for (k=0; k<rlen; k++) { 1210 for (j=0; j<bs; j++) { 1211 row_lens[k*bs+j] = bs*crow_lens[k]; 1212 } 1213 } 1214 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1215 } 1216 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1217 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1218 } else { 1219 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1220 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1221 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1222 } 1223 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1224 1225 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1226 information needed to make it for each row from a block row. This does require more communication but still not more than 1227 the communication needed for the nonzero values */ 1228 nzmax = nz; /* space a largest processor needs */ 1229 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1230 ierr = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr); 1231 cnt = 0; 1232 for (i=0; i<a->mbs; i++) { 1233 pcnt = cnt; 1234 for (j=B->i[i]; j<B->i[i+1]; j++) { 1235 if ((col = garray[B->j[j]]) > cstart) break; 1236 for (l=0; l<bs; l++) { 1237 column_indices[cnt++] = bs*col+l; 1238 } 1239 } 1240 for (k=A->i[i]; k<A->i[i+1]; k++) { 1241 for (l=0; l<bs; l++) { 1242 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1243 } 1244 } 1245 for (; j<B->i[i+1]; j++) { 1246 for (l=0; l<bs; l++) { 1247 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1248 } 1249 } 1250 len = cnt - pcnt; 1251 for (k=1; k<bs; k++) { 1252 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1253 cnt += len; 1254 } 1255 } 1256 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1257 1258 /* store the columns to the file */ 1259 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1260 if (!rank) { 1261 MPI_Status status; 1262 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1263 for (i=1; i<size; i++) { 1264 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1265 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1266 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1267 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1268 } 1269 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1270 } else { 1271 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1272 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1273 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1274 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1275 } 1276 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1277 1278 /* load up the numerical values */ 1279 ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr); 1280 cnt = 0; 1281 for (i=0; i<a->mbs; i++) { 1282 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1283 for (j=B->i[i]; j<B->i[i+1]; j++) { 1284 if (garray[B->j[j]] > cstart) break; 1285 for (l=0; l<bs; l++) { 1286 for (ll=0; ll<bs; ll++) { 1287 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1288 } 1289 } 1290 cnt += bs; 1291 } 1292 for (k=A->i[i]; k<A->i[i+1]; k++) { 1293 for (l=0; l<bs; l++) { 1294 for (ll=0; ll<bs; ll++) { 1295 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1296 } 1297 } 1298 cnt += bs; 1299 } 1300 for (; j<B->i[i+1]; j++) { 1301 for (l=0; l<bs; l++) { 1302 for (ll=0; ll<bs; ll++) { 1303 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1304 } 1305 } 1306 cnt += bs; 1307 } 1308 cnt += (bs-1)*rlen; 1309 } 1310 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1311 1312 /* store the column values to the file */ 1313 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1314 if (!rank) { 1315 MPI_Status status; 1316 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1317 for (i=1; i<size; i++) { 1318 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1319 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1320 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1321 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1322 } 1323 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1324 } else { 1325 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1326 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1327 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1328 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1329 } 1330 ierr = PetscFree(column_values);CHKERRQ(ierr); 1331 1332 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1333 if (file) { 1334 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1335 } 1336 PetscFunctionReturn(0); 1337 } 1338 1339 #undef __FUNCT__ 1340 #define __FUNCT__ "MatView_MPIBAIJ" 1341 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1342 { 1343 PetscErrorCode ierr; 1344 PetscBool iascii,isdraw,issocket,isbinary; 1345 1346 PetscFunctionBegin; 1347 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1348 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1349 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1350 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1351 if (iascii || isdraw || issocket) { 1352 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1353 } else if (isbinary) { 1354 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1355 } 1356 PetscFunctionReturn(0); 1357 } 1358 1359 #undef __FUNCT__ 1360 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1361 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1362 { 1363 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1364 PetscErrorCode ierr; 1365 1366 PetscFunctionBegin; 1367 #if defined(PETSC_USE_LOG) 1368 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1369 #endif 1370 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1371 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1372 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1373 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1374 #if defined(PETSC_USE_CTABLE) 1375 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1376 #else 1377 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1378 #endif 1379 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1380 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1381 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1382 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1383 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1384 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1385 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1386 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1387 1388 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1389 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1390 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1391 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1392 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1393 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1394 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1395 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr); 1396 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1397 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr); 1398 PetscFunctionReturn(0); 1399 } 1400 1401 #undef __FUNCT__ 1402 #define __FUNCT__ "MatMult_MPIBAIJ" 1403 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1404 { 1405 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1406 PetscErrorCode ierr; 1407 PetscInt nt; 1408 1409 PetscFunctionBegin; 1410 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1411 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1412 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1413 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1414 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1415 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1416 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1417 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1418 PetscFunctionReturn(0); 1419 } 1420 1421 #undef __FUNCT__ 1422 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1423 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1424 { 1425 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1426 PetscErrorCode ierr; 1427 1428 PetscFunctionBegin; 1429 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1430 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1431 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1432 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1433 PetscFunctionReturn(0); 1434 } 1435 1436 #undef __FUNCT__ 1437 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1438 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1439 { 1440 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1441 PetscErrorCode ierr; 1442 PetscBool merged; 1443 1444 PetscFunctionBegin; 1445 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1446 /* do nondiagonal part */ 1447 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1448 if (!merged) { 1449 /* send it on its way */ 1450 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1451 /* do local part */ 1452 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1453 /* receive remote parts: note this assumes the values are not actually */ 1454 /* inserted in yy until the next line */ 1455 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1456 } else { 1457 /* do local part */ 1458 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1459 /* send it on its way */ 1460 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1461 /* values actually were received in the Begin() but we need to call this nop */ 1462 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1463 } 1464 PetscFunctionReturn(0); 1465 } 1466 1467 #undef __FUNCT__ 1468 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1469 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1470 { 1471 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1472 PetscErrorCode ierr; 1473 1474 PetscFunctionBegin; 1475 /* do nondiagonal part */ 1476 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1477 /* send it on its way */ 1478 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1479 /* do local part */ 1480 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1481 /* receive remote parts: note this assumes the values are not actually */ 1482 /* inserted in yy until the next line, which is true for my implementation*/ 1483 /* but is not perhaps always true. */ 1484 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1485 PetscFunctionReturn(0); 1486 } 1487 1488 /* 1489 This only works correctly for square matrices where the subblock A->A is the 1490 diagonal block 1491 */ 1492 #undef __FUNCT__ 1493 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1494 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1495 { 1496 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1497 PetscErrorCode ierr; 1498 1499 PetscFunctionBegin; 1500 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1501 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1502 PetscFunctionReturn(0); 1503 } 1504 1505 #undef __FUNCT__ 1506 #define __FUNCT__ "MatScale_MPIBAIJ" 1507 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1508 { 1509 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1510 PetscErrorCode ierr; 1511 1512 PetscFunctionBegin; 1513 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1514 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1515 PetscFunctionReturn(0); 1516 } 1517 1518 #undef __FUNCT__ 1519 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1520 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1521 { 1522 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1523 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1524 PetscErrorCode ierr; 1525 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1526 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1527 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1528 1529 PetscFunctionBegin; 1530 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1531 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1532 mat->getrowactive = PETSC_TRUE; 1533 1534 if (!mat->rowvalues && (idx || v)) { 1535 /* 1536 allocate enough space to hold information from the longest row. 1537 */ 1538 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1539 PetscInt max = 1,mbs = mat->mbs,tmp; 1540 for (i=0; i<mbs; i++) { 1541 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1542 if (max < tmp) max = tmp; 1543 } 1544 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1545 } 1546 lrow = row - brstart; 1547 1548 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1549 if (!v) {pvA = 0; pvB = 0;} 1550 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1551 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1552 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1553 nztot = nzA + nzB; 1554 1555 cmap = mat->garray; 1556 if (v || idx) { 1557 if (nztot) { 1558 /* Sort by increasing column numbers, assuming A and B already sorted */ 1559 PetscInt imark = -1; 1560 if (v) { 1561 *v = v_p = mat->rowvalues; 1562 for (i=0; i<nzB; i++) { 1563 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1564 else break; 1565 } 1566 imark = i; 1567 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1568 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1569 } 1570 if (idx) { 1571 *idx = idx_p = mat->rowindices; 1572 if (imark > -1) { 1573 for (i=0; i<imark; i++) { 1574 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1575 } 1576 } else { 1577 for (i=0; i<nzB; i++) { 1578 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1579 else break; 1580 } 1581 imark = i; 1582 } 1583 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1584 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1585 } 1586 } else { 1587 if (idx) *idx = 0; 1588 if (v) *v = 0; 1589 } 1590 } 1591 *nz = nztot; 1592 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1593 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1594 PetscFunctionReturn(0); 1595 } 1596 1597 #undef __FUNCT__ 1598 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1599 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1600 { 1601 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1602 1603 PetscFunctionBegin; 1604 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1605 baij->getrowactive = PETSC_FALSE; 1606 PetscFunctionReturn(0); 1607 } 1608 1609 #undef __FUNCT__ 1610 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1611 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1612 { 1613 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1614 PetscErrorCode ierr; 1615 1616 PetscFunctionBegin; 1617 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1618 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1619 PetscFunctionReturn(0); 1620 } 1621 1622 #undef __FUNCT__ 1623 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1624 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1625 { 1626 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1627 Mat A = a->A,B = a->B; 1628 PetscErrorCode ierr; 1629 PetscReal isend[5],irecv[5]; 1630 1631 PetscFunctionBegin; 1632 info->block_size = (PetscReal)matin->rmap->bs; 1633 1634 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1635 1636 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1637 isend[3] = info->memory; isend[4] = info->mallocs; 1638 1639 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1640 1641 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1642 isend[3] += info->memory; isend[4] += info->mallocs; 1643 1644 if (flag == MAT_LOCAL) { 1645 info->nz_used = isend[0]; 1646 info->nz_allocated = isend[1]; 1647 info->nz_unneeded = isend[2]; 1648 info->memory = isend[3]; 1649 info->mallocs = isend[4]; 1650 } else if (flag == MAT_GLOBAL_MAX) { 1651 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1652 1653 info->nz_used = irecv[0]; 1654 info->nz_allocated = irecv[1]; 1655 info->nz_unneeded = irecv[2]; 1656 info->memory = irecv[3]; 1657 info->mallocs = irecv[4]; 1658 } else if (flag == MAT_GLOBAL_SUM) { 1659 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1660 1661 info->nz_used = irecv[0]; 1662 info->nz_allocated = irecv[1]; 1663 info->nz_unneeded = irecv[2]; 1664 info->memory = irecv[3]; 1665 info->mallocs = irecv[4]; 1666 } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1667 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1668 info->fill_ratio_needed = 0; 1669 info->factor_mallocs = 0; 1670 PetscFunctionReturn(0); 1671 } 1672 1673 #undef __FUNCT__ 1674 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1675 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1676 { 1677 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1678 PetscErrorCode ierr; 1679 1680 PetscFunctionBegin; 1681 switch (op) { 1682 case MAT_NEW_NONZERO_LOCATIONS: 1683 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1684 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1685 case MAT_KEEP_NONZERO_PATTERN: 1686 case MAT_NEW_NONZERO_LOCATION_ERR: 1687 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1688 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1689 break; 1690 case MAT_ROW_ORIENTED: 1691 a->roworiented = flg; 1692 1693 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1694 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1695 break; 1696 case MAT_NEW_DIAGONALS: 1697 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1698 break; 1699 case MAT_IGNORE_OFF_PROC_ENTRIES: 1700 a->donotstash = flg; 1701 break; 1702 case MAT_USE_HASH_TABLE: 1703 a->ht_flag = flg; 1704 break; 1705 case MAT_SYMMETRIC: 1706 case MAT_STRUCTURALLY_SYMMETRIC: 1707 case MAT_HERMITIAN: 1708 case MAT_SYMMETRY_ETERNAL: 1709 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1710 break; 1711 default: 1712 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1713 } 1714 PetscFunctionReturn(0); 1715 } 1716 1717 #undef __FUNCT__ 1718 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1719 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1720 { 1721 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1722 Mat_SeqBAIJ *Aloc; 1723 Mat B; 1724 PetscErrorCode ierr; 1725 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1726 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1727 MatScalar *a; 1728 1729 PetscFunctionBegin; 1730 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1731 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1732 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1733 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1734 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1735 /* Do not know preallocation information, but must set block size */ 1736 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr); 1737 } else { 1738 B = *matout; 1739 } 1740 1741 /* copy over the A part */ 1742 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1743 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1744 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1745 1746 for (i=0; i<mbs; i++) { 1747 rvals[0] = bs*(baij->rstartbs + i); 1748 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1749 for (j=ai[i]; j<ai[i+1]; j++) { 1750 col = (baij->cstartbs+aj[j])*bs; 1751 for (k=0; k<bs; k++) { 1752 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1753 1754 col++; a += bs; 1755 } 1756 } 1757 } 1758 /* copy over the B part */ 1759 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1760 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1761 for (i=0; i<mbs; i++) { 1762 rvals[0] = bs*(baij->rstartbs + i); 1763 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1764 for (j=ai[i]; j<ai[i+1]; j++) { 1765 col = baij->garray[aj[j]]*bs; 1766 for (k=0; k<bs; k++) { 1767 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1768 col++; 1769 a += bs; 1770 } 1771 } 1772 } 1773 ierr = PetscFree(rvals);CHKERRQ(ierr); 1774 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1775 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1776 1777 if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B; 1778 else { 1779 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1780 } 1781 PetscFunctionReturn(0); 1782 } 1783 1784 #undef __FUNCT__ 1785 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1786 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1787 { 1788 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1789 Mat a = baij->A,b = baij->B; 1790 PetscErrorCode ierr; 1791 PetscInt s1,s2,s3; 1792 1793 PetscFunctionBegin; 1794 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1795 if (rr) { 1796 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1797 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1798 /* Overlap communication with computation. */ 1799 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1800 } 1801 if (ll) { 1802 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1803 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1804 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1805 } 1806 /* scale the diagonal block */ 1807 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1808 1809 if (rr) { 1810 /* Do a scatter end and then right scale the off-diagonal block */ 1811 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1812 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1813 } 1814 PetscFunctionReturn(0); 1815 } 1816 1817 #undef __FUNCT__ 1818 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1819 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1820 { 1821 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1822 PetscInt *owners = A->rmap->range; 1823 PetscInt n = A->rmap->n; 1824 PetscSF sf; 1825 PetscInt *lrows; 1826 PetscSFNode *rrows; 1827 PetscInt r, p = 0, len = 0; 1828 PetscErrorCode ierr; 1829 1830 PetscFunctionBegin; 1831 /* Create SF where leaves are input rows and roots are owned rows */ 1832 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1833 for (r = 0; r < n; ++r) lrows[r] = -1; 1834 if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);} 1835 for (r = 0; r < N; ++r) { 1836 const PetscInt idx = rows[r]; 1837 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1838 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1839 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1840 } 1841 if (A->nooffproczerorows) { 1842 if (p != l->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,l->rank); 1843 lrows[len++] = idx - owners[p]; 1844 } else { 1845 rrows[r].rank = p; 1846 rrows[r].index = rows[r] - owners[p]; 1847 } 1848 } 1849 if (!A->nooffproczerorows) { 1850 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1851 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1852 /* Collect flags for rows to be zeroed */ 1853 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1854 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1855 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1856 /* Compress and put in row numbers */ 1857 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1858 } 1859 /* fix right hand side if needed */ 1860 if (x && b) { 1861 const PetscScalar *xx; 1862 PetscScalar *bb; 1863 1864 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1865 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1866 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 1867 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1868 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1869 } 1870 1871 /* actually zap the local rows */ 1872 /* 1873 Zero the required rows. If the "diagonal block" of the matrix 1874 is square and the user wishes to set the diagonal we use separate 1875 code so that MatSetValues() is not called for each diagonal allocating 1876 new memory, thus calling lots of mallocs and slowing things down. 1877 1878 */ 1879 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1880 ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1881 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1882 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr); 1883 } else if (diag != 0.0) { 1884 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr); 1885 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1886 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1887 for (r = 0; r < len; ++r) { 1888 const PetscInt row = lrows[r] + A->rmap->rstart; 1889 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1890 } 1891 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1892 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1893 } else { 1894 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1895 } 1896 ierr = PetscFree(lrows);CHKERRQ(ierr); 1897 1898 /* only change matrix nonzero state if pattern was allowed to be changed */ 1899 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1900 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1901 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1902 } 1903 PetscFunctionReturn(0); 1904 } 1905 1906 #undef __FUNCT__ 1907 #define __FUNCT__ "MatZeroRowsColumns_MPIBAIJ" 1908 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1909 { 1910 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1911 PetscErrorCode ierr; 1912 PetscMPIInt n = A->rmap->n; 1913 PetscInt i,j,k,r,p = 0,len = 0,row,col,count; 1914 PetscInt *lrows,*owners = A->rmap->range; 1915 PetscSFNode *rrows; 1916 PetscSF sf; 1917 const PetscScalar *xx; 1918 PetscScalar *bb,*mask; 1919 Vec xmask,lmask; 1920 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)l->B->data; 1921 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1922 PetscScalar *aa; 1923 1924 PetscFunctionBegin; 1925 /* Create SF where leaves are input rows and roots are owned rows */ 1926 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1927 for (r = 0; r < n; ++r) lrows[r] = -1; 1928 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1929 for (r = 0; r < N; ++r) { 1930 const PetscInt idx = rows[r]; 1931 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1932 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1933 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1934 } 1935 rrows[r].rank = p; 1936 rrows[r].index = rows[r] - owners[p]; 1937 } 1938 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1939 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1940 /* Collect flags for rows to be zeroed */ 1941 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1942 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1943 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1944 /* Compress and put in row numbers */ 1945 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1946 /* zero diagonal part of matrix */ 1947 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1948 /* handle off diagonal part of matrix */ 1949 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1950 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1951 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1952 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1953 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1954 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1955 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1956 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1957 if (x) { 1958 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1959 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1960 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1961 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1962 } 1963 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1964 /* remove zeroed rows of off diagonal matrix */ 1965 for (i = 0; i < len; ++i) { 1966 row = lrows[i]; 1967 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1968 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 1969 for (k = 0; k < count; ++k) { 1970 aa[0] = 0.0; 1971 aa += bs; 1972 } 1973 } 1974 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1975 for (i = 0; i < l->B->rmap->N; ++i) { 1976 row = i/bs; 1977 for (j = baij->i[row]; j < baij->i[row+1]; ++j) { 1978 for (k = 0; k < bs; ++k) { 1979 col = bs*baij->j[j] + k; 1980 if (PetscAbsScalar(mask[col])) { 1981 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 1982 if (b) bb[i] -= aa[0]*xx[col]; 1983 aa[0] = 0.0; 1984 } 1985 } 1986 } 1987 } 1988 if (x) { 1989 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1990 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1991 } 1992 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1993 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1994 ierr = PetscFree(lrows);CHKERRQ(ierr); 1995 1996 /* only change matrix nonzero state if pattern was allowed to be changed */ 1997 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1998 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1999 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2000 } 2001 PetscFunctionReturn(0); 2002 } 2003 2004 #undef __FUNCT__ 2005 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 2006 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 2007 { 2008 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2009 PetscErrorCode ierr; 2010 2011 PetscFunctionBegin; 2012 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2013 PetscFunctionReturn(0); 2014 } 2015 2016 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 2017 2018 #undef __FUNCT__ 2019 #define __FUNCT__ "MatEqual_MPIBAIJ" 2020 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 2021 { 2022 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 2023 Mat a,b,c,d; 2024 PetscBool flg; 2025 PetscErrorCode ierr; 2026 2027 PetscFunctionBegin; 2028 a = matA->A; b = matA->B; 2029 c = matB->A; d = matB->B; 2030 2031 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2032 if (flg) { 2033 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2034 } 2035 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2036 PetscFunctionReturn(0); 2037 } 2038 2039 #undef __FUNCT__ 2040 #define __FUNCT__ "MatCopy_MPIBAIJ" 2041 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 2042 { 2043 PetscErrorCode ierr; 2044 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2045 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2046 2047 PetscFunctionBegin; 2048 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2049 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2050 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2051 } else { 2052 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2053 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2054 } 2055 PetscFunctionReturn(0); 2056 } 2057 2058 #undef __FUNCT__ 2059 #define __FUNCT__ "MatSetUp_MPIBAIJ" 2060 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 2061 { 2062 PetscErrorCode ierr; 2063 2064 PetscFunctionBegin; 2065 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2066 PetscFunctionReturn(0); 2067 } 2068 2069 #undef __FUNCT__ 2070 #define __FUNCT__ "MatAXPYGetPreallocation_MPIBAIJ" 2071 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2072 { 2073 PetscErrorCode ierr; 2074 PetscInt bs = Y->rmap->bs,m = Y->rmap->N/bs; 2075 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2076 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2077 2078 PetscFunctionBegin; 2079 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2080 PetscFunctionReturn(0); 2081 } 2082 2083 #undef __FUNCT__ 2084 #define __FUNCT__ "MatAXPY_MPIBAIJ" 2085 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2086 { 2087 PetscErrorCode ierr; 2088 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 2089 PetscBLASInt bnz,one=1; 2090 Mat_SeqBAIJ *x,*y; 2091 2092 PetscFunctionBegin; 2093 if (str == SAME_NONZERO_PATTERN) { 2094 PetscScalar alpha = a; 2095 x = (Mat_SeqBAIJ*)xx->A->data; 2096 y = (Mat_SeqBAIJ*)yy->A->data; 2097 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2098 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2099 x = (Mat_SeqBAIJ*)xx->B->data; 2100 y = (Mat_SeqBAIJ*)yy->B->data; 2101 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2102 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2103 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2104 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2105 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2106 } else { 2107 Mat B; 2108 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 2109 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2110 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2111 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2112 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2113 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2114 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2115 ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr); 2116 ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2117 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2118 ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2119 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 2120 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2121 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2122 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2123 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2124 } 2125 PetscFunctionReturn(0); 2126 } 2127 2128 #undef __FUNCT__ 2129 #define __FUNCT__ "MatRealPart_MPIBAIJ" 2130 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 2131 { 2132 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2133 PetscErrorCode ierr; 2134 2135 PetscFunctionBegin; 2136 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2137 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2138 PetscFunctionReturn(0); 2139 } 2140 2141 #undef __FUNCT__ 2142 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 2143 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2144 { 2145 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2146 PetscErrorCode ierr; 2147 2148 PetscFunctionBegin; 2149 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2150 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2151 PetscFunctionReturn(0); 2152 } 2153 2154 #undef __FUNCT__ 2155 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2156 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2157 { 2158 PetscErrorCode ierr; 2159 IS iscol_local; 2160 PetscInt csize; 2161 2162 PetscFunctionBegin; 2163 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2164 if (call == MAT_REUSE_MATRIX) { 2165 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2166 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2167 } else { 2168 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2169 } 2170 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2171 if (call == MAT_INITIAL_MATRIX) { 2172 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2173 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2174 } 2175 PetscFunctionReturn(0); 2176 } 2177 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*); 2178 #undef __FUNCT__ 2179 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2180 /* 2181 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2182 in local and then by concatenating the local matrices the end result. 2183 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ(). 2184 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 2185 */ 2186 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2187 { 2188 PetscErrorCode ierr; 2189 PetscMPIInt rank,size; 2190 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2191 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow; 2192 Mat M,Mreuse; 2193 MatScalar *vwork,*aa; 2194 MPI_Comm comm; 2195 IS isrow_new, iscol_new; 2196 PetscBool idflag,allrows, allcols; 2197 Mat_SeqBAIJ *aij; 2198 2199 PetscFunctionBegin; 2200 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2201 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2202 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2203 /* The compression and expansion should be avoided. Doesn't point 2204 out errors, might change the indices, hence buggey */ 2205 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 2206 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 2207 2208 /* Check for special case: each processor gets entire matrix columns */ 2209 ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr); 2210 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 2211 if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE; 2212 else allcols = PETSC_FALSE; 2213 2214 ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr); 2215 ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr); 2216 if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE; 2217 else allrows = PETSC_FALSE; 2218 2219 if (call == MAT_REUSE_MATRIX) { 2220 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 2221 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2222 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2223 } else { 2224 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2225 } 2226 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 2227 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 2228 /* 2229 m - number of local rows 2230 n - number of columns (same on all processors) 2231 rstart - first row in new global matrix generated 2232 */ 2233 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2234 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2235 m = m/bs; 2236 n = n/bs; 2237 2238 if (call == MAT_INITIAL_MATRIX) { 2239 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2240 ii = aij->i; 2241 jj = aij->j; 2242 2243 /* 2244 Determine the number of non-zeros in the diagonal and off-diagonal 2245 portions of the matrix in order to do correct preallocation 2246 */ 2247 2248 /* first get start and end of "diagonal" columns */ 2249 if (csize == PETSC_DECIDE) { 2250 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2251 if (mglobal == n*bs) { /* square matrix */ 2252 nlocal = m; 2253 } else { 2254 nlocal = n/size + ((n % size) > rank); 2255 } 2256 } else { 2257 nlocal = csize/bs; 2258 } 2259 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2260 rstart = rend - nlocal; 2261 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2262 2263 /* next, compute all the lengths */ 2264 ierr = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr); 2265 for (i=0; i<m; i++) { 2266 jend = ii[i+1] - ii[i]; 2267 olen = 0; 2268 dlen = 0; 2269 for (j=0; j<jend; j++) { 2270 if (*jj < rstart || *jj >= rend) olen++; 2271 else dlen++; 2272 jj++; 2273 } 2274 olens[i] = olen; 2275 dlens[i] = dlen; 2276 } 2277 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2278 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2279 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2280 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2281 ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2282 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 2283 } else { 2284 PetscInt ml,nl; 2285 2286 M = *newmat; 2287 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2288 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2289 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2290 /* 2291 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2292 rather than the slower MatSetValues(). 2293 */ 2294 M->was_assembled = PETSC_TRUE; 2295 M->assembled = PETSC_FALSE; 2296 } 2297 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2298 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2299 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2300 ii = aij->i; 2301 jj = aij->j; 2302 aa = aij->a; 2303 for (i=0; i<m; i++) { 2304 row = rstart/bs + i; 2305 nz = ii[i+1] - ii[i]; 2306 cwork = jj; jj += nz; 2307 vwork = aa; aa += nz*bs*bs; 2308 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2309 } 2310 2311 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2312 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2313 *newmat = M; 2314 2315 /* save submatrix used in processor for next request */ 2316 if (call == MAT_INITIAL_MATRIX) { 2317 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2318 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2319 } 2320 PetscFunctionReturn(0); 2321 } 2322 2323 #undef __FUNCT__ 2324 #define __FUNCT__ "MatPermute_MPIBAIJ" 2325 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2326 { 2327 MPI_Comm comm,pcomm; 2328 PetscInt clocal_size,nrows; 2329 const PetscInt *rows; 2330 PetscMPIInt size; 2331 IS crowp,lcolp; 2332 PetscErrorCode ierr; 2333 2334 PetscFunctionBegin; 2335 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2336 /* make a collective version of 'rowp' */ 2337 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2338 if (pcomm==comm) { 2339 crowp = rowp; 2340 } else { 2341 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2342 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2343 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2344 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2345 } 2346 ierr = ISSetPermutation(crowp);CHKERRQ(ierr); 2347 /* make a local version of 'colp' */ 2348 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2349 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2350 if (size==1) { 2351 lcolp = colp; 2352 } else { 2353 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 2354 } 2355 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2356 /* now we just get the submatrix */ 2357 ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr); 2358 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2359 /* clean up */ 2360 if (pcomm!=comm) { 2361 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2362 } 2363 if (size>1) { 2364 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2365 } 2366 PetscFunctionReturn(0); 2367 } 2368 2369 #undef __FUNCT__ 2370 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2371 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2372 { 2373 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2374 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2375 2376 PetscFunctionBegin; 2377 if (nghosts) *nghosts = B->nbs; 2378 if (ghosts) *ghosts = baij->garray; 2379 PetscFunctionReturn(0); 2380 } 2381 2382 #undef __FUNCT__ 2383 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ" 2384 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2385 { 2386 Mat B; 2387 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2388 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2389 Mat_SeqAIJ *b; 2390 PetscErrorCode ierr; 2391 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2392 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2393 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2394 2395 PetscFunctionBegin; 2396 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2397 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2398 2399 /* ---------------------------------------------------------------- 2400 Tell every processor the number of nonzeros per row 2401 */ 2402 ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr); 2403 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2404 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2405 } 2406 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2407 ierr = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr); 2408 displs = recvcounts + size; 2409 for (i=0; i<size; i++) { 2410 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2411 displs[i] = A->rmap->range[i]/bs; 2412 } 2413 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2414 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2415 #else 2416 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2417 #endif 2418 /* --------------------------------------------------------------- 2419 Create the sequential matrix of the same type as the local block diagonal 2420 */ 2421 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2422 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2423 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2424 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2425 b = (Mat_SeqAIJ*)B->data; 2426 2427 /*-------------------------------------------------------------------- 2428 Copy my part of matrix column indices over 2429 */ 2430 sendcount = ad->nz + bd->nz; 2431 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2432 a_jsendbuf = ad->j; 2433 b_jsendbuf = bd->j; 2434 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2435 cnt = 0; 2436 for (i=0; i<n; i++) { 2437 2438 /* put in lower diagonal portion */ 2439 m = bd->i[i+1] - bd->i[i]; 2440 while (m > 0) { 2441 /* is it above diagonal (in bd (compressed) numbering) */ 2442 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2443 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2444 m--; 2445 } 2446 2447 /* put in diagonal portion */ 2448 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2449 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2450 } 2451 2452 /* put in upper diagonal portion */ 2453 while (m-- > 0) { 2454 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2455 } 2456 } 2457 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2458 2459 /*-------------------------------------------------------------------- 2460 Gather all column indices to all processors 2461 */ 2462 for (i=0; i<size; i++) { 2463 recvcounts[i] = 0; 2464 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2465 recvcounts[i] += lens[j]; 2466 } 2467 } 2468 displs[0] = 0; 2469 for (i=1; i<size; i++) { 2470 displs[i] = displs[i-1] + recvcounts[i-1]; 2471 } 2472 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2473 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2474 #else 2475 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2476 #endif 2477 /*-------------------------------------------------------------------- 2478 Assemble the matrix into useable form (note numerical values not yet set) 2479 */ 2480 /* set the b->ilen (length of each row) values */ 2481 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2482 /* set the b->i indices */ 2483 b->i[0] = 0; 2484 for (i=1; i<=A->rmap->N/bs; i++) { 2485 b->i[i] = b->i[i-1] + lens[i-1]; 2486 } 2487 ierr = PetscFree(lens);CHKERRQ(ierr); 2488 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2489 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2490 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2491 2492 if (A->symmetric) { 2493 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2494 } else if (A->hermitian) { 2495 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2496 } else if (A->structurally_symmetric) { 2497 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2498 } 2499 *newmat = B; 2500 PetscFunctionReturn(0); 2501 } 2502 2503 #undef __FUNCT__ 2504 #define __FUNCT__ "MatSOR_MPIBAIJ" 2505 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2506 { 2507 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2508 PetscErrorCode ierr; 2509 Vec bb1 = 0; 2510 2511 PetscFunctionBegin; 2512 if (flag == SOR_APPLY_UPPER) { 2513 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2514 PetscFunctionReturn(0); 2515 } 2516 2517 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2518 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2519 } 2520 2521 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2522 if (flag & SOR_ZERO_INITIAL_GUESS) { 2523 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2524 its--; 2525 } 2526 2527 while (its--) { 2528 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2529 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2530 2531 /* update rhs: bb1 = bb - B*x */ 2532 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2533 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2534 2535 /* local sweep */ 2536 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2537 } 2538 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2539 if (flag & SOR_ZERO_INITIAL_GUESS) { 2540 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2541 its--; 2542 } 2543 while (its--) { 2544 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2545 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2546 2547 /* update rhs: bb1 = bb - B*x */ 2548 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2549 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2550 2551 /* local sweep */ 2552 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2553 } 2554 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2555 if (flag & SOR_ZERO_INITIAL_GUESS) { 2556 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2557 its--; 2558 } 2559 while (its--) { 2560 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2561 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2562 2563 /* update rhs: bb1 = bb - B*x */ 2564 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2565 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2566 2567 /* local sweep */ 2568 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2569 } 2570 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2571 2572 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2573 PetscFunctionReturn(0); 2574 } 2575 2576 #undef __FUNCT__ 2577 #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ" 2578 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms) 2579 { 2580 PetscErrorCode ierr; 2581 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data; 2582 PetscInt N,i,*garray = aij->garray; 2583 PetscInt ib,jb,bs = A->rmap->bs; 2584 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data; 2585 MatScalar *a_val = a_aij->a; 2586 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data; 2587 MatScalar *b_val = b_aij->a; 2588 PetscReal *work; 2589 2590 PetscFunctionBegin; 2591 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 2592 ierr = PetscCalloc1(N,&work);CHKERRQ(ierr); 2593 if (type == NORM_2) { 2594 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2595 for (jb=0; jb<bs; jb++) { 2596 for (ib=0; ib<bs; ib++) { 2597 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2598 a_val++; 2599 } 2600 } 2601 } 2602 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2603 for (jb=0; jb<bs; jb++) { 2604 for (ib=0; ib<bs; ib++) { 2605 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2606 b_val++; 2607 } 2608 } 2609 } 2610 } else if (type == NORM_1) { 2611 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2612 for (jb=0; jb<bs; jb++) { 2613 for (ib=0; ib<bs; ib++) { 2614 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2615 a_val++; 2616 } 2617 } 2618 } 2619 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2620 for (jb=0; jb<bs; jb++) { 2621 for (ib=0; ib<bs; ib++) { 2622 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2623 b_val++; 2624 } 2625 } 2626 } 2627 } else if (type == NORM_INFINITY) { 2628 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2629 for (jb=0; jb<bs; jb++) { 2630 for (ib=0; ib<bs; ib++) { 2631 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2632 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2633 a_val++; 2634 } 2635 } 2636 } 2637 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2638 for (jb=0; jb<bs; jb++) { 2639 for (ib=0; ib<bs; ib++) { 2640 int col = garray[b_aij->j[i]] * bs + jb; 2641 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2642 b_val++; 2643 } 2644 } 2645 } 2646 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 2647 if (type == NORM_INFINITY) { 2648 ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2649 } else { 2650 ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2651 } 2652 ierr = PetscFree(work);CHKERRQ(ierr); 2653 if (type == NORM_2) { 2654 for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]); 2655 } 2656 PetscFunctionReturn(0); 2657 } 2658 2659 #undef __FUNCT__ 2660 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ" 2661 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2662 { 2663 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2664 PetscErrorCode ierr; 2665 2666 PetscFunctionBegin; 2667 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2668 PetscFunctionReturn(0); 2669 } 2670 2671 #undef __FUNCT__ 2672 #define __FUNCT__ "MatShift_MPIBAIJ" 2673 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a) 2674 { 2675 PetscErrorCode ierr; 2676 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ*)Y->data; 2677 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)maij->A->data; 2678 2679 PetscFunctionBegin; 2680 if (!Y->preallocated) { 2681 ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 2682 } else if (!aij->nz) { 2683 PetscInt nonew = aij->nonew; 2684 ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2685 aij->nonew = nonew; 2686 } 2687 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2688 PetscFunctionReturn(0); 2689 } 2690 2691 /* -------------------------------------------------------------------*/ 2692 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2693 MatGetRow_MPIBAIJ, 2694 MatRestoreRow_MPIBAIJ, 2695 MatMult_MPIBAIJ, 2696 /* 4*/ MatMultAdd_MPIBAIJ, 2697 MatMultTranspose_MPIBAIJ, 2698 MatMultTransposeAdd_MPIBAIJ, 2699 0, 2700 0, 2701 0, 2702 /*10*/ 0, 2703 0, 2704 0, 2705 MatSOR_MPIBAIJ, 2706 MatTranspose_MPIBAIJ, 2707 /*15*/ MatGetInfo_MPIBAIJ, 2708 MatEqual_MPIBAIJ, 2709 MatGetDiagonal_MPIBAIJ, 2710 MatDiagonalScale_MPIBAIJ, 2711 MatNorm_MPIBAIJ, 2712 /*20*/ MatAssemblyBegin_MPIBAIJ, 2713 MatAssemblyEnd_MPIBAIJ, 2714 MatSetOption_MPIBAIJ, 2715 MatZeroEntries_MPIBAIJ, 2716 /*24*/ MatZeroRows_MPIBAIJ, 2717 0, 2718 0, 2719 0, 2720 0, 2721 /*29*/ MatSetUp_MPIBAIJ, 2722 0, 2723 0, 2724 0, 2725 0, 2726 /*34*/ MatDuplicate_MPIBAIJ, 2727 0, 2728 0, 2729 0, 2730 0, 2731 /*39*/ MatAXPY_MPIBAIJ, 2732 MatGetSubMatrices_MPIBAIJ, 2733 MatIncreaseOverlap_MPIBAIJ, 2734 MatGetValues_MPIBAIJ, 2735 MatCopy_MPIBAIJ, 2736 /*44*/ 0, 2737 MatScale_MPIBAIJ, 2738 MatShift_MPIBAIJ, 2739 0, 2740 MatZeroRowsColumns_MPIBAIJ, 2741 /*49*/ 0, 2742 0, 2743 0, 2744 0, 2745 0, 2746 /*54*/ MatFDColoringCreate_MPIXAIJ, 2747 0, 2748 MatSetUnfactored_MPIBAIJ, 2749 MatPermute_MPIBAIJ, 2750 MatSetValuesBlocked_MPIBAIJ, 2751 /*59*/ MatGetSubMatrix_MPIBAIJ, 2752 MatDestroy_MPIBAIJ, 2753 MatView_MPIBAIJ, 2754 0, 2755 0, 2756 /*64*/ 0, 2757 0, 2758 0, 2759 0, 2760 0, 2761 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2762 0, 2763 0, 2764 0, 2765 0, 2766 /*74*/ 0, 2767 MatFDColoringApply_BAIJ, 2768 0, 2769 0, 2770 0, 2771 /*79*/ 0, 2772 0, 2773 0, 2774 0, 2775 MatLoad_MPIBAIJ, 2776 /*84*/ 0, 2777 0, 2778 0, 2779 0, 2780 0, 2781 /*89*/ 0, 2782 0, 2783 0, 2784 0, 2785 0, 2786 /*94*/ 0, 2787 0, 2788 0, 2789 0, 2790 0, 2791 /*99*/ 0, 2792 0, 2793 0, 2794 0, 2795 0, 2796 /*104*/0, 2797 MatRealPart_MPIBAIJ, 2798 MatImaginaryPart_MPIBAIJ, 2799 0, 2800 0, 2801 /*109*/0, 2802 0, 2803 0, 2804 0, 2805 0, 2806 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2807 0, 2808 MatGetGhosts_MPIBAIJ, 2809 0, 2810 0, 2811 /*119*/0, 2812 0, 2813 0, 2814 0, 2815 MatGetMultiProcBlock_MPIBAIJ, 2816 /*124*/0, 2817 MatGetColumnNorms_MPIBAIJ, 2818 MatInvertBlockDiagonal_MPIBAIJ, 2819 0, 2820 0, 2821 /*129*/ 0, 2822 0, 2823 0, 2824 0, 2825 0, 2826 /*134*/ 0, 2827 0, 2828 0, 2829 0, 2830 0, 2831 /*139*/ 0, 2832 0, 2833 0, 2834 MatFDColoringSetUp_MPIXAIJ, 2835 0, 2836 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ 2837 }; 2838 2839 #undef __FUNCT__ 2840 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2841 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2842 { 2843 PetscFunctionBegin; 2844 *a = ((Mat_MPIBAIJ*)A->data)->A; 2845 PetscFunctionReturn(0); 2846 } 2847 2848 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2849 2850 #undef __FUNCT__ 2851 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2852 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2853 { 2854 PetscInt m,rstart,cstart,cend; 2855 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2856 const PetscInt *JJ =0; 2857 PetscScalar *values=0; 2858 PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented; 2859 PetscErrorCode ierr; 2860 2861 PetscFunctionBegin; 2862 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2863 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2864 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2865 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2866 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2867 m = B->rmap->n/bs; 2868 rstart = B->rmap->rstart/bs; 2869 cstart = B->cmap->rstart/bs; 2870 cend = B->cmap->rend/bs; 2871 2872 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2873 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2874 for (i=0; i<m; i++) { 2875 nz = ii[i+1] - ii[i]; 2876 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2877 nz_max = PetscMax(nz_max,nz); 2878 JJ = jj + ii[i]; 2879 for (j=0; j<nz; j++) { 2880 if (*JJ >= cstart) break; 2881 JJ++; 2882 } 2883 d = 0; 2884 for (; j<nz; j++) { 2885 if (*JJ++ >= cend) break; 2886 d++; 2887 } 2888 d_nnz[i] = d; 2889 o_nnz[i] = nz - d; 2890 } 2891 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2892 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2893 2894 values = (PetscScalar*)V; 2895 if (!values) { 2896 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2897 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2898 } 2899 for (i=0; i<m; i++) { 2900 PetscInt row = i + rstart; 2901 PetscInt ncols = ii[i+1] - ii[i]; 2902 const PetscInt *icols = jj + ii[i]; 2903 if (!roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2904 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2905 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2906 } else { /* block ordering does not match so we can only insert one block at a time. */ 2907 PetscInt j; 2908 for (j=0; j<ncols; j++) { 2909 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2910 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2911 } 2912 } 2913 } 2914 2915 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2916 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2917 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2918 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2919 PetscFunctionReturn(0); 2920 } 2921 2922 #undef __FUNCT__ 2923 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2924 /*@C 2925 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2926 (the default parallel PETSc format). 2927 2928 Collective on MPI_Comm 2929 2930 Input Parameters: 2931 + B - the matrix 2932 . bs - the block size 2933 . i - the indices into j for the start of each local row (starts with zero) 2934 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2935 - v - optional values in the matrix 2936 2937 Level: developer 2938 2939 Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 2940 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 2941 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2942 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2943 block column and the second index is over columns within a block. 2944 2945 .keywords: matrix, aij, compressed row, sparse, parallel 2946 2947 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ 2948 @*/ 2949 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2950 { 2951 PetscErrorCode ierr; 2952 2953 PetscFunctionBegin; 2954 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2955 PetscValidType(B,1); 2956 PetscValidLogicalCollectiveInt(B,bs,2); 2957 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2958 PetscFunctionReturn(0); 2959 } 2960 2961 #undef __FUNCT__ 2962 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2963 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2964 { 2965 Mat_MPIBAIJ *b; 2966 PetscErrorCode ierr; 2967 PetscInt i; 2968 2969 PetscFunctionBegin; 2970 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2971 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2972 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2973 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2974 2975 if (d_nnz) { 2976 for (i=0; i<B->rmap->n/bs; i++) { 2977 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2978 } 2979 } 2980 if (o_nnz) { 2981 for (i=0; i<B->rmap->n/bs; i++) { 2982 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2983 } 2984 } 2985 2986 b = (Mat_MPIBAIJ*)B->data; 2987 b->bs2 = bs*bs; 2988 b->mbs = B->rmap->n/bs; 2989 b->nbs = B->cmap->n/bs; 2990 b->Mbs = B->rmap->N/bs; 2991 b->Nbs = B->cmap->N/bs; 2992 2993 for (i=0; i<=b->size; i++) { 2994 b->rangebs[i] = B->rmap->range[i]/bs; 2995 } 2996 b->rstartbs = B->rmap->rstart/bs; 2997 b->rendbs = B->rmap->rend/bs; 2998 b->cstartbs = B->cmap->rstart/bs; 2999 b->cendbs = B->cmap->rend/bs; 3000 3001 if (!B->preallocated) { 3002 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3003 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3004 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 3005 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3006 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3007 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3008 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 3009 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3010 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 3011 } 3012 3013 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3014 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 3015 B->preallocated = PETSC_TRUE; 3016 PetscFunctionReturn(0); 3017 } 3018 3019 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 3020 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 3021 3022 #undef __FUNCT__ 3023 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 3024 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 3025 { 3026 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 3027 PetscErrorCode ierr; 3028 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 3029 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 3030 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 3031 3032 PetscFunctionBegin; 3033 ierr = PetscMalloc1(M+1,&ii);CHKERRQ(ierr); 3034 ii[0] = 0; 3035 for (i=0; i<M; i++) { 3036 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 3037 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 3038 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 3039 /* remove one from count of matrix has diagonal */ 3040 for (j=id[i]; j<id[i+1]; j++) { 3041 if (jd[j] == i) {ii[i+1]--;break;} 3042 } 3043 } 3044 ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr); 3045 cnt = 0; 3046 for (i=0; i<M; i++) { 3047 for (j=io[i]; j<io[i+1]; j++) { 3048 if (garray[jo[j]] > rstart) break; 3049 jj[cnt++] = garray[jo[j]]; 3050 } 3051 for (k=id[i]; k<id[i+1]; k++) { 3052 if (jd[k] != i) { 3053 jj[cnt++] = rstart + jd[k]; 3054 } 3055 } 3056 for (; j<io[i+1]; j++) { 3057 jj[cnt++] = garray[jo[j]]; 3058 } 3059 } 3060 ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr); 3061 PetscFunctionReturn(0); 3062 } 3063 3064 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3065 3066 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 3067 3068 #undef __FUNCT__ 3069 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 3070 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 3071 { 3072 PetscErrorCode ierr; 3073 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3074 Mat B; 3075 Mat_MPIAIJ *b; 3076 3077 PetscFunctionBegin; 3078 if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 3079 3080 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3081 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 3082 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3083 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3084 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 3085 ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 3086 b = (Mat_MPIAIJ*) B->data; 3087 3088 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 3089 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 3090 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 3091 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 3092 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 3093 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3094 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3095 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3096 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3097 if (reuse == MAT_REUSE_MATRIX) { 3098 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3099 } else { 3100 *newmat = B; 3101 } 3102 PetscFunctionReturn(0); 3103 } 3104 3105 /*MC 3106 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 3107 3108 Options Database Keys: 3109 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 3110 . -mat_block_size <bs> - set the blocksize used to store the matrix 3111 - -mat_use_hash_table <fact> 3112 3113 Level: beginner 3114 3115 .seealso: MatCreateMPIBAIJ 3116 M*/ 3117 3118 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 3119 3120 #undef __FUNCT__ 3121 #define __FUNCT__ "MatCreate_MPIBAIJ" 3122 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3123 { 3124 Mat_MPIBAIJ *b; 3125 PetscErrorCode ierr; 3126 PetscBool flg = PETSC_FALSE; 3127 3128 PetscFunctionBegin; 3129 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3130 B->data = (void*)b; 3131 3132 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3133 B->assembled = PETSC_FALSE; 3134 3135 B->insertmode = NOT_SET_VALUES; 3136 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 3137 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 3138 3139 /* build local table of row and column ownerships */ 3140 ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr); 3141 3142 /* build cache for off array entries formed */ 3143 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 3144 3145 b->donotstash = PETSC_FALSE; 3146 b->colmap = NULL; 3147 b->garray = NULL; 3148 b->roworiented = PETSC_TRUE; 3149 3150 /* stuff used in block assembly */ 3151 b->barray = 0; 3152 3153 /* stuff used for matrix vector multiply */ 3154 b->lvec = 0; 3155 b->Mvctx = 0; 3156 3157 /* stuff for MatGetRow() */ 3158 b->rowindices = 0; 3159 b->rowvalues = 0; 3160 b->getrowactive = PETSC_FALSE; 3161 3162 /* hash table stuff */ 3163 b->ht = 0; 3164 b->hd = 0; 3165 b->ht_size = 0; 3166 b->ht_flag = PETSC_FALSE; 3167 b->ht_fact = 0; 3168 b->ht_total_ct = 0; 3169 b->ht_insert_ct = 0; 3170 3171 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 3172 b->ijonly = PETSC_FALSE; 3173 3174 3175 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3176 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3177 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3178 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3179 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3180 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3181 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3182 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3183 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3184 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3185 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr); 3186 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3187 3188 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3189 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 3190 if (flg) { 3191 PetscReal fact = 1.39; 3192 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3193 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 3194 if (fact <= 1.0) fact = 1.39; 3195 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3196 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3197 } 3198 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3199 PetscFunctionReturn(0); 3200 } 3201 3202 /*MC 3203 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3204 3205 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3206 and MATMPIBAIJ otherwise. 3207 3208 Options Database Keys: 3209 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3210 3211 Level: beginner 3212 3213 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3214 M*/ 3215 3216 #undef __FUNCT__ 3217 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3218 /*@C 3219 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3220 (block compressed row). For good matrix assembly performance 3221 the user should preallocate the matrix storage by setting the parameters 3222 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3223 performance can be increased by more than a factor of 50. 3224 3225 Collective on Mat 3226 3227 Input Parameters: 3228 + B - the matrix 3229 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3230 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3231 . d_nz - number of block nonzeros per block row in diagonal portion of local 3232 submatrix (same for all local rows) 3233 . d_nnz - array containing the number of block nonzeros in the various block rows 3234 of the in diagonal portion of the local (possibly different for each block 3235 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 3236 set it even if it is zero. 3237 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3238 submatrix (same for all local rows). 3239 - o_nnz - array containing the number of nonzeros in the various block rows of the 3240 off-diagonal portion of the local submatrix (possibly different for 3241 each block row) or NULL. 3242 3243 If the *_nnz parameter is given then the *_nz parameter is ignored 3244 3245 Options Database Keys: 3246 + -mat_block_size - size of the blocks to use 3247 - -mat_use_hash_table <fact> 3248 3249 Notes: 3250 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3251 than it must be used on all processors that share the object for that argument. 3252 3253 Storage Information: 3254 For a square global matrix we define each processor's diagonal portion 3255 to be its local rows and the corresponding columns (a square submatrix); 3256 each processor's off-diagonal portion encompasses the remainder of the 3257 local matrix (a rectangular submatrix). 3258 3259 The user can specify preallocated storage for the diagonal part of 3260 the local submatrix with either d_nz or d_nnz (not both). Set 3261 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3262 memory allocation. Likewise, specify preallocated storage for the 3263 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3264 3265 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3266 the figure below we depict these three local rows and all columns (0-11). 3267 3268 .vb 3269 0 1 2 3 4 5 6 7 8 9 10 11 3270 -------------------------- 3271 row 3 |o o o d d d o o o o o o 3272 row 4 |o o o d d d o o o o o o 3273 row 5 |o o o d d d o o o o o o 3274 -------------------------- 3275 .ve 3276 3277 Thus, any entries in the d locations are stored in the d (diagonal) 3278 submatrix, and any entries in the o locations are stored in the 3279 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3280 stored simply in the MATSEQBAIJ format for compressed row storage. 3281 3282 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3283 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3284 In general, for PDE problems in which most nonzeros are near the diagonal, 3285 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3286 or you will get TERRIBLE performance; see the users' manual chapter on 3287 matrices. 3288 3289 You can call MatGetInfo() to get information on how effective the preallocation was; 3290 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3291 You can also run with the option -info and look for messages with the string 3292 malloc in them to see if additional memory allocation was needed. 3293 3294 Level: intermediate 3295 3296 .keywords: matrix, block, aij, compressed row, sparse, parallel 3297 3298 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3299 @*/ 3300 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3301 { 3302 PetscErrorCode ierr; 3303 3304 PetscFunctionBegin; 3305 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3306 PetscValidType(B,1); 3307 PetscValidLogicalCollectiveInt(B,bs,2); 3308 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3309 PetscFunctionReturn(0); 3310 } 3311 3312 #undef __FUNCT__ 3313 #define __FUNCT__ "MatCreateBAIJ" 3314 /*@C 3315 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3316 (block compressed row). For good matrix assembly performance 3317 the user should preallocate the matrix storage by setting the parameters 3318 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3319 performance can be increased by more than a factor of 50. 3320 3321 Collective on MPI_Comm 3322 3323 Input Parameters: 3324 + comm - MPI communicator 3325 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3326 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3327 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3328 This value should be the same as the local size used in creating the 3329 y vector for the matrix-vector product y = Ax. 3330 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3331 This value should be the same as the local size used in creating the 3332 x vector for the matrix-vector product y = Ax. 3333 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3334 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3335 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3336 submatrix (same for all local rows) 3337 . d_nnz - array containing the number of nonzero blocks in the various block rows 3338 of the in diagonal portion of the local (possibly different for each block 3339 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3340 and set it even if it is zero. 3341 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3342 submatrix (same for all local rows). 3343 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3344 off-diagonal portion of the local submatrix (possibly different for 3345 each block row) or NULL. 3346 3347 Output Parameter: 3348 . A - the matrix 3349 3350 Options Database Keys: 3351 + -mat_block_size - size of the blocks to use 3352 - -mat_use_hash_table <fact> 3353 3354 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3355 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3356 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3357 3358 Notes: 3359 If the *_nnz parameter is given then the *_nz parameter is ignored 3360 3361 A nonzero block is any block that as 1 or more nonzeros in it 3362 3363 The user MUST specify either the local or global matrix dimensions 3364 (possibly both). 3365 3366 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3367 than it must be used on all processors that share the object for that argument. 3368 3369 Storage Information: 3370 For a square global matrix we define each processor's diagonal portion 3371 to be its local rows and the corresponding columns (a square submatrix); 3372 each processor's off-diagonal portion encompasses the remainder of the 3373 local matrix (a rectangular submatrix). 3374 3375 The user can specify preallocated storage for the diagonal part of 3376 the local submatrix with either d_nz or d_nnz (not both). Set 3377 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3378 memory allocation. Likewise, specify preallocated storage for the 3379 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3380 3381 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3382 the figure below we depict these three local rows and all columns (0-11). 3383 3384 .vb 3385 0 1 2 3 4 5 6 7 8 9 10 11 3386 -------------------------- 3387 row 3 |o o o d d d o o o o o o 3388 row 4 |o o o d d d o o o o o o 3389 row 5 |o o o d d d o o o o o o 3390 -------------------------- 3391 .ve 3392 3393 Thus, any entries in the d locations are stored in the d (diagonal) 3394 submatrix, and any entries in the o locations are stored in the 3395 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3396 stored simply in the MATSEQBAIJ format for compressed row storage. 3397 3398 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3399 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3400 In general, for PDE problems in which most nonzeros are near the diagonal, 3401 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3402 or you will get TERRIBLE performance; see the users' manual chapter on 3403 matrices. 3404 3405 Level: intermediate 3406 3407 .keywords: matrix, block, aij, compressed row, sparse, parallel 3408 3409 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3410 @*/ 3411 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3412 { 3413 PetscErrorCode ierr; 3414 PetscMPIInt size; 3415 3416 PetscFunctionBegin; 3417 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3418 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3419 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3420 if (size > 1) { 3421 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3422 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3423 } else { 3424 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3425 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3426 } 3427 PetscFunctionReturn(0); 3428 } 3429 3430 #undef __FUNCT__ 3431 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3432 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3433 { 3434 Mat mat; 3435 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3436 PetscErrorCode ierr; 3437 PetscInt len=0; 3438 3439 PetscFunctionBegin; 3440 *newmat = 0; 3441 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3442 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3443 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3444 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3445 3446 mat->factortype = matin->factortype; 3447 mat->preallocated = PETSC_TRUE; 3448 mat->assembled = PETSC_TRUE; 3449 mat->insertmode = NOT_SET_VALUES; 3450 3451 a = (Mat_MPIBAIJ*)mat->data; 3452 mat->rmap->bs = matin->rmap->bs; 3453 a->bs2 = oldmat->bs2; 3454 a->mbs = oldmat->mbs; 3455 a->nbs = oldmat->nbs; 3456 a->Mbs = oldmat->Mbs; 3457 a->Nbs = oldmat->Nbs; 3458 3459 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3460 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3461 3462 a->size = oldmat->size; 3463 a->rank = oldmat->rank; 3464 a->donotstash = oldmat->donotstash; 3465 a->roworiented = oldmat->roworiented; 3466 a->rowindices = 0; 3467 a->rowvalues = 0; 3468 a->getrowactive = PETSC_FALSE; 3469 a->barray = 0; 3470 a->rstartbs = oldmat->rstartbs; 3471 a->rendbs = oldmat->rendbs; 3472 a->cstartbs = oldmat->cstartbs; 3473 a->cendbs = oldmat->cendbs; 3474 3475 /* hash table stuff */ 3476 a->ht = 0; 3477 a->hd = 0; 3478 a->ht_size = 0; 3479 a->ht_flag = oldmat->ht_flag; 3480 a->ht_fact = oldmat->ht_fact; 3481 a->ht_total_ct = 0; 3482 a->ht_insert_ct = 0; 3483 3484 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3485 if (oldmat->colmap) { 3486 #if defined(PETSC_USE_CTABLE) 3487 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3488 #else 3489 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 3490 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3491 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3492 #endif 3493 } else a->colmap = 0; 3494 3495 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3496 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 3497 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3498 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3499 } else a->garray = 0; 3500 3501 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3502 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3503 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3504 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3505 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3506 3507 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3508 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3509 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3510 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3511 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3512 *newmat = mat; 3513 PetscFunctionReturn(0); 3514 } 3515 3516 #undef __FUNCT__ 3517 #define __FUNCT__ "MatLoad_MPIBAIJ" 3518 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3519 { 3520 PetscErrorCode ierr; 3521 int fd; 3522 PetscInt i,nz,j,rstart,rend; 3523 PetscScalar *vals,*buf; 3524 MPI_Comm comm; 3525 MPI_Status status; 3526 PetscMPIInt rank,size,maxnz; 3527 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3528 PetscInt *locrowlens = NULL,*procsnz = NULL,*browners = NULL; 3529 PetscInt jj,*mycols,*ibuf,bs = newmat->rmap->bs,Mbs,mbs,extra_rows,mmax; 3530 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3531 PetscInt *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount; 3532 PetscInt dcount,kmax,k,nzcount,tmp,mend; 3533 3534 PetscFunctionBegin; 3535 /* force binary viewer to load .info file if it has not yet done so */ 3536 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3537 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3538 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3539 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3540 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3541 if (bs < 0) bs = 1; 3542 3543 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3544 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3545 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3546 if (!rank) { 3547 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3548 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3549 } 3550 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3551 M = header[1]; N = header[2]; 3552 3553 /* If global sizes are set, check if they are consistent with that given in the file */ 3554 if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M); 3555 if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N); 3556 3557 if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices"); 3558 3559 /* 3560 This code adds extra rows to make sure the number of rows is 3561 divisible by the blocksize 3562 */ 3563 Mbs = M/bs; 3564 extra_rows = bs - M + bs*Mbs; 3565 if (extra_rows == bs) extra_rows = 0; 3566 else Mbs++; 3567 if (extra_rows && !rank) { 3568 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3569 } 3570 3571 /* determine ownership of all rows */ 3572 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3573 mbs = Mbs/size + ((Mbs % size) > rank); 3574 m = mbs*bs; 3575 } else { /* User set */ 3576 m = newmat->rmap->n; 3577 mbs = m/bs; 3578 } 3579 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 3580 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3581 3582 /* process 0 needs enough room for process with most rows */ 3583 if (!rank) { 3584 mmax = rowners[1]; 3585 for (i=2; i<=size; i++) { 3586 mmax = PetscMax(mmax,rowners[i]); 3587 } 3588 mmax*=bs; 3589 } else mmax = -1; /* unused, but compiler warns anyway */ 3590 3591 rowners[0] = 0; 3592 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3593 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3594 rstart = rowners[rank]; 3595 rend = rowners[rank+1]; 3596 3597 /* distribute row lengths to all processors */ 3598 ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr); 3599 if (!rank) { 3600 mend = m; 3601 if (size == 1) mend = mend - extra_rows; 3602 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3603 for (j=mend; j<m; j++) locrowlens[j] = 1; 3604 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3605 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3606 for (j=0; j<m; j++) { 3607 procsnz[0] += locrowlens[j]; 3608 } 3609 for (i=1; i<size; i++) { 3610 mend = browners[i+1] - browners[i]; 3611 if (i == size-1) mend = mend - extra_rows; 3612 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3613 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3614 /* calculate the number of nonzeros on each processor */ 3615 for (j=0; j<browners[i+1]-browners[i]; j++) { 3616 procsnz[i] += rowlengths[j]; 3617 } 3618 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3619 } 3620 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3621 } else { 3622 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3623 } 3624 3625 if (!rank) { 3626 /* determine max buffer needed and allocate it */ 3627 maxnz = procsnz[0]; 3628 for (i=1; i<size; i++) { 3629 maxnz = PetscMax(maxnz,procsnz[i]); 3630 } 3631 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3632 3633 /* read in my part of the matrix column indices */ 3634 nz = procsnz[0]; 3635 ierr = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr); 3636 mycols = ibuf; 3637 if (size == 1) nz -= extra_rows; 3638 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3639 if (size == 1) { 3640 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 3641 } 3642 3643 /* read in every ones (except the last) and ship off */ 3644 for (i=1; i<size-1; i++) { 3645 nz = procsnz[i]; 3646 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3647 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3648 } 3649 /* read in the stuff for the last proc */ 3650 if (size != 1) { 3651 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3652 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3653 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3654 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3655 } 3656 ierr = PetscFree(cols);CHKERRQ(ierr); 3657 } else { 3658 /* determine buffer space needed for message */ 3659 nz = 0; 3660 for (i=0; i<m; i++) { 3661 nz += locrowlens[i]; 3662 } 3663 ierr = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr); 3664 mycols = ibuf; 3665 /* receive message of column indices*/ 3666 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3667 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3668 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3669 } 3670 3671 /* loop over local rows, determining number of off diagonal entries */ 3672 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 3673 ierr = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 3674 rowcount = 0; nzcount = 0; 3675 for (i=0; i<mbs; i++) { 3676 dcount = 0; 3677 odcount = 0; 3678 for (j=0; j<bs; j++) { 3679 kmax = locrowlens[rowcount]; 3680 for (k=0; k<kmax; k++) { 3681 tmp = mycols[nzcount++]/bs; 3682 if (!mask[tmp]) { 3683 mask[tmp] = 1; 3684 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3685 else masked1[dcount++] = tmp; 3686 } 3687 } 3688 rowcount++; 3689 } 3690 3691 dlens[i] = dcount; 3692 odlens[i] = odcount; 3693 3694 /* zero out the mask elements we set */ 3695 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3696 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3697 } 3698 3699 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3700 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3701 3702 if (!rank) { 3703 ierr = PetscMalloc1(maxnz+1,&buf);CHKERRQ(ierr); 3704 /* read in my part of the matrix numerical values */ 3705 nz = procsnz[0]; 3706 vals = buf; 3707 mycols = ibuf; 3708 if (size == 1) nz -= extra_rows; 3709 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3710 if (size == 1) { 3711 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 3712 } 3713 3714 /* insert into matrix */ 3715 jj = rstart*bs; 3716 for (i=0; i<m; i++) { 3717 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3718 mycols += locrowlens[i]; 3719 vals += locrowlens[i]; 3720 jj++; 3721 } 3722 /* read in other processors (except the last one) and ship out */ 3723 for (i=1; i<size-1; i++) { 3724 nz = procsnz[i]; 3725 vals = buf; 3726 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3727 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3728 } 3729 /* the last proc */ 3730 if (size != 1) { 3731 nz = procsnz[i] - extra_rows; 3732 vals = buf; 3733 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3734 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3735 ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3736 } 3737 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3738 } else { 3739 /* receive numeric values */ 3740 ierr = PetscMalloc1(nz+1,&buf);CHKERRQ(ierr); 3741 3742 /* receive message of values*/ 3743 vals = buf; 3744 mycols = ibuf; 3745 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3746 3747 /* insert into matrix */ 3748 jj = rstart*bs; 3749 for (i=0; i<m; i++) { 3750 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3751 mycols += locrowlens[i]; 3752 vals += locrowlens[i]; 3753 jj++; 3754 } 3755 } 3756 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3757 ierr = PetscFree(buf);CHKERRQ(ierr); 3758 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3759 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3760 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3761 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3762 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3763 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3764 PetscFunctionReturn(0); 3765 } 3766 3767 #undef __FUNCT__ 3768 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3769 /*@ 3770 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3771 3772 Input Parameters: 3773 . mat - the matrix 3774 . fact - factor 3775 3776 Not Collective, each process can use a different factor 3777 3778 Level: advanced 3779 3780 Notes: 3781 This can also be set by the command line option: -mat_use_hash_table <fact> 3782 3783 .keywords: matrix, hashtable, factor, HT 3784 3785 .seealso: MatSetOption() 3786 @*/ 3787 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3788 { 3789 PetscErrorCode ierr; 3790 3791 PetscFunctionBegin; 3792 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3793 PetscFunctionReturn(0); 3794 } 3795 3796 #undef __FUNCT__ 3797 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3798 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3799 { 3800 Mat_MPIBAIJ *baij; 3801 3802 PetscFunctionBegin; 3803 baij = (Mat_MPIBAIJ*)mat->data; 3804 baij->ht_fact = fact; 3805 PetscFunctionReturn(0); 3806 } 3807 3808 #undef __FUNCT__ 3809 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3810 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3811 { 3812 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3813 3814 PetscFunctionBegin; 3815 if (Ad) *Ad = a->A; 3816 if (Ao) *Ao = a->B; 3817 if (colmap) *colmap = a->garray; 3818 PetscFunctionReturn(0); 3819 } 3820 3821 /* 3822 Special version for direct calls from Fortran (to eliminate two function call overheads 3823 */ 3824 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3825 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3826 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3827 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3828 #endif 3829 3830 #undef __FUNCT__ 3831 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3832 /*@C 3833 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3834 3835 Collective on Mat 3836 3837 Input Parameters: 3838 + mat - the matrix 3839 . min - number of input rows 3840 . im - input rows 3841 . nin - number of input columns 3842 . in - input columns 3843 . v - numerical values input 3844 - addvin - INSERT_VALUES or ADD_VALUES 3845 3846 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3847 3848 Level: advanced 3849 3850 .seealso: MatSetValuesBlocked() 3851 @*/ 3852 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3853 { 3854 /* convert input arguments to C version */ 3855 Mat mat = *matin; 3856 PetscInt m = *min, n = *nin; 3857 InsertMode addv = *addvin; 3858 3859 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3860 const MatScalar *value; 3861 MatScalar *barray = baij->barray; 3862 PetscBool roworiented = baij->roworiented; 3863 PetscErrorCode ierr; 3864 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3865 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3866 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3867 3868 PetscFunctionBegin; 3869 /* tasks normally handled by MatSetValuesBlocked() */ 3870 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3871 #if defined(PETSC_USE_DEBUG) 3872 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3873 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3874 #endif 3875 if (mat->assembled) { 3876 mat->was_assembled = PETSC_TRUE; 3877 mat->assembled = PETSC_FALSE; 3878 } 3879 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3880 3881 3882 if (!barray) { 3883 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 3884 baij->barray = barray; 3885 } 3886 3887 if (roworiented) stepval = (n-1)*bs; 3888 else stepval = (m-1)*bs; 3889 3890 for (i=0; i<m; i++) { 3891 if (im[i] < 0) continue; 3892 #if defined(PETSC_USE_DEBUG) 3893 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3894 #endif 3895 if (im[i] >= rstart && im[i] < rend) { 3896 row = im[i] - rstart; 3897 for (j=0; j<n; j++) { 3898 /* If NumCol = 1 then a copy is not required */ 3899 if ((roworiented) && (n == 1)) { 3900 barray = (MatScalar*)v + i*bs2; 3901 } else if ((!roworiented) && (m == 1)) { 3902 barray = (MatScalar*)v + j*bs2; 3903 } else { /* Here a copy is required */ 3904 if (roworiented) { 3905 value = v + i*(stepval+bs)*bs + j*bs; 3906 } else { 3907 value = v + j*(stepval+bs)*bs + i*bs; 3908 } 3909 for (ii=0; ii<bs; ii++,value+=stepval) { 3910 for (jj=0; jj<bs; jj++) { 3911 *barray++ = *value++; 3912 } 3913 } 3914 barray -=bs2; 3915 } 3916 3917 if (in[j] >= cstart && in[j] < cend) { 3918 col = in[j] - cstart; 3919 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 3920 } else if (in[j] < 0) continue; 3921 #if defined(PETSC_USE_DEBUG) 3922 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 3923 #endif 3924 else { 3925 if (mat->was_assembled) { 3926 if (!baij->colmap) { 3927 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3928 } 3929 3930 #if defined(PETSC_USE_DEBUG) 3931 #if defined(PETSC_USE_CTABLE) 3932 { PetscInt data; 3933 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3934 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3935 } 3936 #else 3937 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3938 #endif 3939 #endif 3940 #if defined(PETSC_USE_CTABLE) 3941 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3942 col = (col - 1)/bs; 3943 #else 3944 col = (baij->colmap[in[j]] - 1)/bs; 3945 #endif 3946 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3947 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3948 col = in[j]; 3949 } 3950 } else col = in[j]; 3951 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 3952 } 3953 } 3954 } else { 3955 if (!baij->donotstash) { 3956 if (roworiented) { 3957 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3958 } else { 3959 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3960 } 3961 } 3962 } 3963 } 3964 3965 /* task normally handled by MatSetValuesBlocked() */ 3966 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3967 PetscFunctionReturn(0); 3968 } 3969 3970 #undef __FUNCT__ 3971 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 3972 /*@ 3973 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 3974 CSR format the local rows. 3975 3976 Collective on MPI_Comm 3977 3978 Input Parameters: 3979 + comm - MPI communicator 3980 . bs - the block size, only a block size of 1 is supported 3981 . m - number of local rows (Cannot be PETSC_DECIDE) 3982 . n - This value should be the same as the local size used in creating the 3983 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3984 calculated if N is given) For square matrices n is almost always m. 3985 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3986 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3987 . i - row indices 3988 . j - column indices 3989 - a - matrix values 3990 3991 Output Parameter: 3992 . mat - the matrix 3993 3994 Level: intermediate 3995 3996 Notes: 3997 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3998 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3999 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4000 4001 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 4002 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 4003 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 4004 with column-major ordering within blocks. 4005 4006 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4007 4008 .keywords: matrix, aij, compressed row, sparse, parallel 4009 4010 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4011 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4012 @*/ 4013 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4014 { 4015 PetscErrorCode ierr; 4016 4017 PetscFunctionBegin; 4018 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4019 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4020 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4021 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4022 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 4023 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 4024 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 4025 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 4026 PetscFunctionReturn(0); 4027 } 4028 4029 #undef __FUNCT__ 4030 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIBAIJ" 4031 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4032 { 4033 PetscErrorCode ierr; 4034 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 4035 PetscInt *indx; 4036 PetscScalar *values; 4037 4038 PetscFunctionBegin; 4039 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4040 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4041 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inmat->data; 4042 PetscInt *dnz,*onz,sum,mbs,Nbs; 4043 PetscInt *bindx,rmax=a->rmax,j; 4044 4045 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4046 mbs = m/bs; Nbs = N/cbs; 4047 if (n == PETSC_DECIDE) { 4048 ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr); 4049 } 4050 /* Check sum(n) = Nbs */ 4051 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4052 if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs); 4053 4054 ierr = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4055 rstart -= mbs; 4056 4057 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 4058 ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr); 4059 for (i=0; i<mbs; i++) { 4060 ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 4061 nnz = nnz/bs; 4062 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 4063 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 4064 ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 4065 } 4066 ierr = PetscFree(bindx);CHKERRQ(ierr); 4067 4068 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4069 ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4070 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4071 ierr = MatSetType(*outmat,MATMPIBAIJ);CHKERRQ(ierr); 4072 ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 4073 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4074 } 4075 4076 /* numeric phase */ 4077 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4078 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4079 4080 for (i=0; i<m; i++) { 4081 ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4082 Ii = i + rstart; 4083 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4084 ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4085 } 4086 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4087 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4088 PetscFunctionReturn(0); 4089 } 4090