1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 #if defined(PETSC_HAVE_HYPRE) 8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 9 #endif 10 11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 12 { 13 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 14 PetscErrorCode ierr; 15 PetscInt i,*idxb = 0; 16 PetscScalar *va,*vb; 17 Vec vtmp; 18 19 PetscFunctionBegin; 20 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 21 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 22 if (idx) { 23 for (i=0; i<A->rmap->n; i++) { 24 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 25 } 26 } 27 28 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 29 if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);} 30 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 31 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 32 33 for (i=0; i<A->rmap->n; i++) { 34 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 35 va[i] = vb[i]; 36 if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs); 37 } 38 } 39 40 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 41 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 42 ierr = PetscFree(idxb);CHKERRQ(ierr); 43 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 44 PetscFunctionReturn(0); 45 } 46 47 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 48 { 49 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 54 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 55 PetscFunctionReturn(0); 56 } 57 58 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 59 { 60 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 61 PetscErrorCode ierr; 62 63 PetscFunctionBegin; 64 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 65 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 66 PetscFunctionReturn(0); 67 } 68 69 /* 70 Local utility routine that creates a mapping from the global column 71 number to the local number in the off-diagonal part of the local 72 storage of the matrix. This is done in a non scalable way since the 73 length of colmap equals the global matrix length. 74 */ 75 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 76 { 77 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 78 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 79 PetscErrorCode ierr; 80 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 81 82 PetscFunctionBegin; 83 #if defined(PETSC_USE_CTABLE) 84 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 85 for (i=0; i<nbs; i++) { 86 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 87 } 88 #else 89 ierr = PetscCalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 90 ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 91 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 92 #endif 93 PetscFunctionReturn(0); 94 } 95 96 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 97 { \ 98 brow = row/bs; \ 99 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 100 rmax = aimax[brow]; nrow = ailen[brow]; \ 101 bcol = col/bs; \ 102 ridx = row % bs; cidx = col % bs; \ 103 low = 0; high = nrow; \ 104 while (high-low > 3) { \ 105 t = (low+high)/2; \ 106 if (rp[t] > bcol) high = t; \ 107 else low = t; \ 108 } \ 109 for (_i=low; _i<high; _i++) { \ 110 if (rp[_i] > bcol) break; \ 111 if (rp[_i] == bcol) { \ 112 bap = ap + bs2*_i + bs*cidx + ridx; \ 113 if (addv == ADD_VALUES) *bap += value; \ 114 else *bap = value; \ 115 goto a_noinsert; \ 116 } \ 117 } \ 118 if (a->nonew == 1) goto a_noinsert; \ 119 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 120 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 121 N = nrow++ - 1; \ 122 /* shift up all the later entries in this row */ \ 123 ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\ 124 ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \ 125 ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \ 126 rp[_i] = bcol; \ 127 ap[bs2*_i + bs*cidx + ridx] = value; \ 128 a_noinsert:; \ 129 ailen[brow] = nrow; \ 130 } 131 132 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 133 { \ 134 brow = row/bs; \ 135 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 136 rmax = bimax[brow]; nrow = bilen[brow]; \ 137 bcol = col/bs; \ 138 ridx = row % bs; cidx = col % bs; \ 139 low = 0; high = nrow; \ 140 while (high-low > 3) { \ 141 t = (low+high)/2; \ 142 if (rp[t] > bcol) high = t; \ 143 else low = t; \ 144 } \ 145 for (_i=low; _i<high; _i++) { \ 146 if (rp[_i] > bcol) break; \ 147 if (rp[_i] == bcol) { \ 148 bap = ap + bs2*_i + bs*cidx + ridx; \ 149 if (addv == ADD_VALUES) *bap += value; \ 150 else *bap = value; \ 151 goto b_noinsert; \ 152 } \ 153 } \ 154 if (b->nonew == 1) goto b_noinsert; \ 155 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 156 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 157 N = nrow++ - 1; \ 158 /* shift up all the later entries in this row */ \ 159 ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\ 160 ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr);\ 161 ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \ 162 rp[_i] = bcol; \ 163 ap[bs2*_i + bs*cidx + ridx] = value; \ 164 b_noinsert:; \ 165 bilen[brow] = nrow; \ 166 } 167 168 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 169 { 170 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 171 MatScalar value; 172 PetscBool roworiented = baij->roworiented; 173 PetscErrorCode ierr; 174 PetscInt i,j,row,col; 175 PetscInt rstart_orig=mat->rmap->rstart; 176 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 177 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 178 179 /* Some Variables required in the macro */ 180 Mat A = baij->A; 181 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 182 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 183 MatScalar *aa =a->a; 184 185 Mat B = baij->B; 186 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 187 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 188 MatScalar *ba =b->a; 189 190 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 191 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 192 MatScalar *ap,*bap; 193 194 PetscFunctionBegin; 195 for (i=0; i<m; i++) { 196 if (im[i] < 0) continue; 197 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 198 if (im[i] >= rstart_orig && im[i] < rend_orig) { 199 row = im[i] - rstart_orig; 200 for (j=0; j<n; j++) { 201 if (in[j] >= cstart_orig && in[j] < cend_orig) { 202 col = in[j] - cstart_orig; 203 if (roworiented) value = v[i*n+j]; 204 else value = v[i+j*m]; 205 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 206 } else if (in[j] < 0) continue; 207 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 208 else { 209 if (mat->was_assembled) { 210 if (!baij->colmap) { 211 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 212 } 213 #if defined(PETSC_USE_CTABLE) 214 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 215 col = col - 1; 216 #else 217 col = baij->colmap[in[j]/bs] - 1; 218 #endif 219 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 220 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 221 col = in[j]; 222 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 223 B = baij->B; 224 b = (Mat_SeqBAIJ*)(B)->data; 225 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 226 ba =b->a; 227 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 228 else col += in[j]%bs; 229 } else col = in[j]; 230 if (roworiented) value = v[i*n+j]; 231 else value = v[i+j*m]; 232 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 233 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 234 } 235 } 236 } else { 237 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 238 if (!baij->donotstash) { 239 mat->assembled = PETSC_FALSE; 240 if (roworiented) { 241 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 242 } else { 243 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 244 } 245 } 246 } 247 } 248 PetscFunctionReturn(0); 249 } 250 251 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 252 { 253 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 254 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 255 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 256 PetscErrorCode ierr; 257 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 258 PetscBool roworiented=a->roworiented; 259 const PetscScalar *value = v; 260 MatScalar *ap,*aa = a->a,*bap; 261 262 PetscFunctionBegin; 263 rp = aj + ai[row]; 264 ap = aa + bs2*ai[row]; 265 rmax = imax[row]; 266 nrow = ailen[row]; 267 value = v; 268 low = 0; 269 high = nrow; 270 while (high-low > 7) { 271 t = (low+high)/2; 272 if (rp[t] > col) high = t; 273 else low = t; 274 } 275 for (i=low; i<high; i++) { 276 if (rp[i] > col) break; 277 if (rp[i] == col) { 278 bap = ap + bs2*i; 279 if (roworiented) { 280 if (is == ADD_VALUES) { 281 for (ii=0; ii<bs; ii++) { 282 for (jj=ii; jj<bs2; jj+=bs) { 283 bap[jj] += *value++; 284 } 285 } 286 } else { 287 for (ii=0; ii<bs; ii++) { 288 for (jj=ii; jj<bs2; jj+=bs) { 289 bap[jj] = *value++; 290 } 291 } 292 } 293 } else { 294 if (is == ADD_VALUES) { 295 for (ii=0; ii<bs; ii++,value+=bs) { 296 for (jj=0; jj<bs; jj++) { 297 bap[jj] += value[jj]; 298 } 299 bap += bs; 300 } 301 } else { 302 for (ii=0; ii<bs; ii++,value+=bs) { 303 for (jj=0; jj<bs; jj++) { 304 bap[jj] = value[jj]; 305 } 306 bap += bs; 307 } 308 } 309 } 310 goto noinsert2; 311 } 312 } 313 if (nonew == 1) goto noinsert2; 314 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 315 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 316 N = nrow++ - 1; high++; 317 /* shift up all the later entries in this row */ 318 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 319 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 320 rp[i] = col; 321 bap = ap + bs2*i; 322 if (roworiented) { 323 for (ii=0; ii<bs; ii++) { 324 for (jj=ii; jj<bs2; jj+=bs) { 325 bap[jj] = *value++; 326 } 327 } 328 } else { 329 for (ii=0; ii<bs; ii++) { 330 for (jj=0; jj<bs; jj++) { 331 *bap++ = *value++; 332 } 333 } 334 } 335 noinsert2:; 336 ailen[row] = nrow; 337 PetscFunctionReturn(0); 338 } 339 340 /* 341 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 342 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 343 */ 344 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 345 { 346 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 347 const PetscScalar *value; 348 MatScalar *barray = baij->barray; 349 PetscBool roworiented = baij->roworiented; 350 PetscErrorCode ierr; 351 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 352 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 353 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 354 355 PetscFunctionBegin; 356 if (!barray) { 357 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 358 baij->barray = barray; 359 } 360 361 if (roworiented) stepval = (n-1)*bs; 362 else stepval = (m-1)*bs; 363 364 for (i=0; i<m; i++) { 365 if (im[i] < 0) continue; 366 if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 367 if (im[i] >= rstart && im[i] < rend) { 368 row = im[i] - rstart; 369 for (j=0; j<n; j++) { 370 /* If NumCol = 1 then a copy is not required */ 371 if ((roworiented) && (n == 1)) { 372 barray = (MatScalar*)v + i*bs2; 373 } else if ((!roworiented) && (m == 1)) { 374 barray = (MatScalar*)v + j*bs2; 375 } else { /* Here a copy is required */ 376 if (roworiented) { 377 value = v + (i*(stepval+bs) + j)*bs; 378 } else { 379 value = v + (j*(stepval+bs) + i)*bs; 380 } 381 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 382 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 383 barray += bs; 384 } 385 barray -= bs2; 386 } 387 388 if (in[j] >= cstart && in[j] < cend) { 389 col = in[j] - cstart; 390 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 391 } else if (in[j] < 0) continue; 392 else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 393 else { 394 if (mat->was_assembled) { 395 if (!baij->colmap) { 396 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 397 } 398 399 #if defined(PETSC_USE_DEBUG) 400 #if defined(PETSC_USE_CTABLE) 401 { PetscInt data; 402 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 403 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 404 } 405 #else 406 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 407 #endif 408 #endif 409 #if defined(PETSC_USE_CTABLE) 410 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 411 col = (col - 1)/bs; 412 #else 413 col = (baij->colmap[in[j]] - 1)/bs; 414 #endif 415 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 416 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 417 col = in[j]; 418 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]); 419 } else col = in[j]; 420 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 421 } 422 } 423 } else { 424 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 425 if (!baij->donotstash) { 426 if (roworiented) { 427 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 428 } else { 429 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 430 } 431 } 432 } 433 } 434 PetscFunctionReturn(0); 435 } 436 437 #define HASH_KEY 0.6180339887 438 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 439 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 440 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 441 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 442 { 443 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 444 PetscBool roworiented = baij->roworiented; 445 PetscErrorCode ierr; 446 PetscInt i,j,row,col; 447 PetscInt rstart_orig=mat->rmap->rstart; 448 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 449 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 450 PetscReal tmp; 451 MatScalar **HD = baij->hd,value; 452 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 453 454 PetscFunctionBegin; 455 for (i=0; i<m; i++) { 456 if (PetscDefined(USE_DEBUG)) { 457 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 458 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 459 } 460 row = im[i]; 461 if (row >= rstart_orig && row < rend_orig) { 462 for (j=0; j<n; j++) { 463 col = in[j]; 464 if (roworiented) value = v[i*n+j]; 465 else value = v[i+j*m]; 466 /* Look up PetscInto the Hash Table */ 467 key = (row/bs)*Nbs+(col/bs)+1; 468 h1 = HASH(size,key,tmp); 469 470 471 idx = h1; 472 if (PetscDefined(USE_DEBUG)) { 473 insert_ct++; 474 total_ct++; 475 if (HT[idx] != key) { 476 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 477 if (idx == size) { 478 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 479 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 480 } 481 } 482 } else if (HT[idx] != key) { 483 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 484 if (idx == size) { 485 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 486 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 487 } 488 } 489 /* A HASH table entry is found, so insert the values at the correct address */ 490 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 491 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 492 } 493 } else if (!baij->donotstash) { 494 if (roworiented) { 495 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 496 } else { 497 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 498 } 499 } 500 } 501 if (PetscDefined(USE_DEBUG)) { 502 baij->ht_total_ct += total_ct; 503 baij->ht_insert_ct += insert_ct; 504 } 505 PetscFunctionReturn(0); 506 } 507 508 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 509 { 510 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 511 PetscBool roworiented = baij->roworiented; 512 PetscErrorCode ierr; 513 PetscInt i,j,ii,jj,row,col; 514 PetscInt rstart=baij->rstartbs; 515 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 516 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 517 PetscReal tmp; 518 MatScalar **HD = baij->hd,*baij_a; 519 const PetscScalar *v_t,*value; 520 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 521 522 PetscFunctionBegin; 523 if (roworiented) stepval = (n-1)*bs; 524 else stepval = (m-1)*bs; 525 526 for (i=0; i<m; i++) { 527 if (PetscDefined(USE_DEBUG)) { 528 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 529 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 530 } 531 row = im[i]; 532 v_t = v + i*nbs2; 533 if (row >= rstart && row < rend) { 534 for (j=0; j<n; j++) { 535 col = in[j]; 536 537 /* Look up into the Hash Table */ 538 key = row*Nbs+col+1; 539 h1 = HASH(size,key,tmp); 540 541 idx = h1; 542 if (PetscDefined(USE_DEBUG)) { 543 total_ct++; 544 insert_ct++; 545 if (HT[idx] != key) { 546 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 547 if (idx == size) { 548 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 549 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 550 } 551 } 552 } else if (HT[idx] != key) { 553 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 554 if (idx == size) { 555 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 556 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 557 } 558 } 559 baij_a = HD[idx]; 560 if (roworiented) { 561 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 562 /* value = v + (i*(stepval+bs)+j)*bs; */ 563 value = v_t; 564 v_t += bs; 565 if (addv == ADD_VALUES) { 566 for (ii=0; ii<bs; ii++,value+=stepval) { 567 for (jj=ii; jj<bs2; jj+=bs) { 568 baij_a[jj] += *value++; 569 } 570 } 571 } else { 572 for (ii=0; ii<bs; ii++,value+=stepval) { 573 for (jj=ii; jj<bs2; jj+=bs) { 574 baij_a[jj] = *value++; 575 } 576 } 577 } 578 } else { 579 value = v + j*(stepval+bs)*bs + i*bs; 580 if (addv == ADD_VALUES) { 581 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 582 for (jj=0; jj<bs; jj++) { 583 baij_a[jj] += *value++; 584 } 585 } 586 } else { 587 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 588 for (jj=0; jj<bs; jj++) { 589 baij_a[jj] = *value++; 590 } 591 } 592 } 593 } 594 } 595 } else { 596 if (!baij->donotstash) { 597 if (roworiented) { 598 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 599 } else { 600 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 601 } 602 } 603 } 604 } 605 if (PetscDefined(USE_DEBUG)) { 606 baij->ht_total_ct += total_ct; 607 baij->ht_insert_ct += insert_ct; 608 } 609 PetscFunctionReturn(0); 610 } 611 612 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 613 { 614 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 615 PetscErrorCode ierr; 616 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 617 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 618 619 PetscFunctionBegin; 620 for (i=0; i<m; i++) { 621 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 622 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 623 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 624 row = idxm[i] - bsrstart; 625 for (j=0; j<n; j++) { 626 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 627 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 628 if (idxn[j] >= bscstart && idxn[j] < bscend) { 629 col = idxn[j] - bscstart; 630 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 631 } else { 632 if (!baij->colmap) { 633 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 634 } 635 #if defined(PETSC_USE_CTABLE) 636 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 637 data--; 638 #else 639 data = baij->colmap[idxn[j]/bs]-1; 640 #endif 641 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 642 else { 643 col = data + idxn[j]%bs; 644 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 645 } 646 } 647 } 648 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 649 } 650 PetscFunctionReturn(0); 651 } 652 653 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 654 { 655 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 656 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 657 PetscErrorCode ierr; 658 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 659 PetscReal sum = 0.0; 660 MatScalar *v; 661 662 PetscFunctionBegin; 663 if (baij->size == 1) { 664 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 665 } else { 666 if (type == NORM_FROBENIUS) { 667 v = amat->a; 668 nz = amat->nz*bs2; 669 for (i=0; i<nz; i++) { 670 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 671 } 672 v = bmat->a; 673 nz = bmat->nz*bs2; 674 for (i=0; i<nz; i++) { 675 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 676 } 677 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 678 *nrm = PetscSqrtReal(*nrm); 679 } else if (type == NORM_1) { /* max column sum */ 680 PetscReal *tmp,*tmp2; 681 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 682 ierr = PetscCalloc1(mat->cmap->N,&tmp);CHKERRQ(ierr); 683 ierr = PetscMalloc1(mat->cmap->N,&tmp2);CHKERRQ(ierr); 684 v = amat->a; jj = amat->j; 685 for (i=0; i<amat->nz; i++) { 686 for (j=0; j<bs; j++) { 687 col = bs*(cstart + *jj) + j; /* column index */ 688 for (row=0; row<bs; row++) { 689 tmp[col] += PetscAbsScalar(*v); v++; 690 } 691 } 692 jj++; 693 } 694 v = bmat->a; jj = bmat->j; 695 for (i=0; i<bmat->nz; i++) { 696 for (j=0; j<bs; j++) { 697 col = bs*garray[*jj] + j; 698 for (row=0; row<bs; row++) { 699 tmp[col] += PetscAbsScalar(*v); v++; 700 } 701 } 702 jj++; 703 } 704 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 705 *nrm = 0.0; 706 for (j=0; j<mat->cmap->N; j++) { 707 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 708 } 709 ierr = PetscFree(tmp);CHKERRQ(ierr); 710 ierr = PetscFree(tmp2);CHKERRQ(ierr); 711 } else if (type == NORM_INFINITY) { /* max row sum */ 712 PetscReal *sums; 713 ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr); 714 sum = 0.0; 715 for (j=0; j<amat->mbs; j++) { 716 for (row=0; row<bs; row++) sums[row] = 0.0; 717 v = amat->a + bs2*amat->i[j]; 718 nz = amat->i[j+1]-amat->i[j]; 719 for (i=0; i<nz; i++) { 720 for (col=0; col<bs; col++) { 721 for (row=0; row<bs; row++) { 722 sums[row] += PetscAbsScalar(*v); v++; 723 } 724 } 725 } 726 v = bmat->a + bs2*bmat->i[j]; 727 nz = bmat->i[j+1]-bmat->i[j]; 728 for (i=0; i<nz; i++) { 729 for (col=0; col<bs; col++) { 730 for (row=0; row<bs; row++) { 731 sums[row] += PetscAbsScalar(*v); v++; 732 } 733 } 734 } 735 for (row=0; row<bs; row++) { 736 if (sums[row] > sum) sum = sums[row]; 737 } 738 } 739 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 740 ierr = PetscFree(sums);CHKERRQ(ierr); 741 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 742 } 743 PetscFunctionReturn(0); 744 } 745 746 /* 747 Creates the hash table, and sets the table 748 This table is created only once. 749 If new entried need to be added to the matrix 750 then the hash table has to be destroyed and 751 recreated. 752 */ 753 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 754 { 755 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 756 Mat A = baij->A,B=baij->B; 757 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 758 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 759 PetscErrorCode ierr; 760 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 761 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 762 PetscInt *HT,key; 763 MatScalar **HD; 764 PetscReal tmp; 765 #if defined(PETSC_USE_INFO) 766 PetscInt ct=0,max=0; 767 #endif 768 769 PetscFunctionBegin; 770 if (baij->ht) PetscFunctionReturn(0); 771 772 baij->ht_size = (PetscInt)(factor*nz); 773 ht_size = baij->ht_size; 774 775 /* Allocate Memory for Hash Table */ 776 ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr); 777 HD = baij->hd; 778 HT = baij->ht; 779 780 /* Loop Over A */ 781 for (i=0; i<a->mbs; i++) { 782 for (j=ai[i]; j<ai[i+1]; j++) { 783 row = i+rstart; 784 col = aj[j]+cstart; 785 786 key = row*Nbs + col + 1; 787 h1 = HASH(ht_size,key,tmp); 788 for (k=0; k<ht_size; k++) { 789 if (!HT[(h1+k)%ht_size]) { 790 HT[(h1+k)%ht_size] = key; 791 HD[(h1+k)%ht_size] = a->a + j*bs2; 792 break; 793 #if defined(PETSC_USE_INFO) 794 } else { 795 ct++; 796 #endif 797 } 798 } 799 #if defined(PETSC_USE_INFO) 800 if (k> max) max = k; 801 #endif 802 } 803 } 804 /* Loop Over B */ 805 for (i=0; i<b->mbs; i++) { 806 for (j=bi[i]; j<bi[i+1]; j++) { 807 row = i+rstart; 808 col = garray[bj[j]]; 809 key = row*Nbs + col + 1; 810 h1 = HASH(ht_size,key,tmp); 811 for (k=0; k<ht_size; k++) { 812 if (!HT[(h1+k)%ht_size]) { 813 HT[(h1+k)%ht_size] = key; 814 HD[(h1+k)%ht_size] = b->a + j*bs2; 815 break; 816 #if defined(PETSC_USE_INFO) 817 } else { 818 ct++; 819 #endif 820 } 821 } 822 #if defined(PETSC_USE_INFO) 823 if (k> max) max = k; 824 #endif 825 } 826 } 827 828 /* Print Summary */ 829 #if defined(PETSC_USE_INFO) 830 for (i=0,j=0; i<ht_size; i++) { 831 if (HT[i]) j++; 832 } 833 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 834 #endif 835 PetscFunctionReturn(0); 836 } 837 838 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 839 { 840 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 841 PetscErrorCode ierr; 842 PetscInt nstash,reallocs; 843 844 PetscFunctionBegin; 845 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 846 847 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 848 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 849 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 850 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 851 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 852 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 853 PetscFunctionReturn(0); 854 } 855 856 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 857 { 858 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 859 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 860 PetscErrorCode ierr; 861 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 862 PetscInt *row,*col; 863 PetscBool r1,r2,r3,other_disassembled; 864 MatScalar *val; 865 PetscMPIInt n; 866 867 PetscFunctionBegin; 868 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 869 if (!baij->donotstash && !mat->nooffprocentries) { 870 while (1) { 871 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 872 if (!flg) break; 873 874 for (i=0; i<n;) { 875 /* Now identify the consecutive vals belonging to the same row */ 876 for (j=i,rstart=row[j]; j<n; j++) { 877 if (row[j] != rstart) break; 878 } 879 if (j < n) ncols = j-i; 880 else ncols = n-i; 881 /* Now assemble all these values with a single function call */ 882 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 883 i = j; 884 } 885 } 886 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 887 /* Now process the block-stash. Since the values are stashed column-oriented, 888 set the roworiented flag to column oriented, and after MatSetValues() 889 restore the original flags */ 890 r1 = baij->roworiented; 891 r2 = a->roworiented; 892 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 893 894 baij->roworiented = PETSC_FALSE; 895 a->roworiented = PETSC_FALSE; 896 897 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 898 while (1) { 899 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 900 if (!flg) break; 901 902 for (i=0; i<n;) { 903 /* Now identify the consecutive vals belonging to the same row */ 904 for (j=i,rstart=row[j]; j<n; j++) { 905 if (row[j] != rstart) break; 906 } 907 if (j < n) ncols = j-i; 908 else ncols = n-i; 909 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 910 i = j; 911 } 912 } 913 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 914 915 baij->roworiented = r1; 916 a->roworiented = r2; 917 918 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 919 } 920 921 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 922 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 923 924 /* determine if any processor has disassembled, if so we must 925 also disassemble ourselfs, in order that we may reassemble. */ 926 /* 927 if nonzero structure of submatrix B cannot change then we know that 928 no processor disassembled thus we can skip this stuff 929 */ 930 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 931 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 932 if (mat->was_assembled && !other_disassembled) { 933 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 934 } 935 } 936 937 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 938 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 939 } 940 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 941 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 942 943 #if defined(PETSC_USE_INFO) 944 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 945 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 946 947 baij->ht_total_ct = 0; 948 baij->ht_insert_ct = 0; 949 } 950 #endif 951 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 952 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 953 954 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 955 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 956 } 957 958 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 959 960 baij->rowvalues = 0; 961 962 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 963 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 964 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 965 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 966 } 967 PetscFunctionReturn(0); 968 } 969 970 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer); 971 #include <petscdraw.h> 972 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 973 { 974 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 975 PetscErrorCode ierr; 976 PetscMPIInt rank = baij->rank; 977 PetscInt bs = mat->rmap->bs; 978 PetscBool iascii,isdraw; 979 PetscViewer sviewer; 980 PetscViewerFormat format; 981 982 PetscFunctionBegin; 983 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 984 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 985 if (iascii) { 986 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 987 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 988 MatInfo info; 989 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 990 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 991 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 992 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n", 993 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr); 994 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 995 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 996 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 997 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 998 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 999 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1000 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1001 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1002 PetscFunctionReturn(0); 1003 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1004 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1005 PetscFunctionReturn(0); 1006 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1007 PetscFunctionReturn(0); 1008 } 1009 } 1010 1011 if (isdraw) { 1012 PetscDraw draw; 1013 PetscBool isnull; 1014 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1015 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1016 if (isnull) PetscFunctionReturn(0); 1017 } 1018 1019 { 1020 /* assemble the entire matrix onto first processor. */ 1021 Mat A; 1022 Mat_SeqBAIJ *Aloc; 1023 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1024 MatScalar *a; 1025 const char *matname; 1026 1027 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1028 /* Perhaps this should be the type of mat? */ 1029 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1030 if (!rank) { 1031 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1032 } else { 1033 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1034 } 1035 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1036 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 1037 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1038 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1039 1040 /* copy over the A part */ 1041 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1042 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1043 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1044 1045 for (i=0; i<mbs; i++) { 1046 rvals[0] = bs*(baij->rstartbs + i); 1047 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1048 for (j=ai[i]; j<ai[i+1]; j++) { 1049 col = (baij->cstartbs+aj[j])*bs; 1050 for (k=0; k<bs; k++) { 1051 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1052 col++; a += bs; 1053 } 1054 } 1055 } 1056 /* copy over the B part */ 1057 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1058 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1059 for (i=0; i<mbs; i++) { 1060 rvals[0] = bs*(baij->rstartbs + i); 1061 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1062 for (j=ai[i]; j<ai[i+1]; j++) { 1063 col = baij->garray[aj[j]]*bs; 1064 for (k=0; k<bs; k++) { 1065 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1066 col++; a += bs; 1067 } 1068 } 1069 } 1070 ierr = PetscFree(rvals);CHKERRQ(ierr); 1071 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1072 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1073 /* 1074 Everyone has to call to draw the matrix since the graphics waits are 1075 synchronized across all processors that share the PetscDraw object 1076 */ 1077 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1078 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 1079 if (!rank) { 1080 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 1081 ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1082 } 1083 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1084 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1085 ierr = MatDestroy(&A);CHKERRQ(ierr); 1086 } 1087 PetscFunctionReturn(0); 1088 } 1089 1090 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1091 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1092 { 1093 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 1094 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)aij->A->data; 1095 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)aij->B->data; 1096 const PetscInt *garray = aij->garray; 1097 PetscInt header[4],M,N,m,rs,cs,bs,nz,cnt,i,j,ja,jb,k,l; 1098 PetscInt *rowlens,*colidxs; 1099 PetscScalar *matvals; 1100 PetscErrorCode ierr; 1101 1102 PetscFunctionBegin; 1103 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1104 1105 M = mat->rmap->N; 1106 N = mat->cmap->N; 1107 m = mat->rmap->n; 1108 rs = mat->rmap->rstart; 1109 cs = mat->cmap->rstart; 1110 bs = mat->rmap->bs; 1111 nz = bs*bs*(A->nz + B->nz); 1112 1113 /* write matrix header */ 1114 header[0] = MAT_FILE_CLASSID; 1115 header[1] = M; header[2] = N; header[3] = nz; 1116 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1117 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1118 1119 /* fill in and store row lengths */ 1120 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1121 for (cnt=0, i=0; i<A->mbs; i++) 1122 for (j=0; j<bs; j++) 1123 rowlens[cnt++] = bs*(A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]); 1124 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1125 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1126 1127 /* fill in and store column indices */ 1128 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1129 for (cnt=0, i=0; i<A->mbs; i++) { 1130 for (k=0; k<bs; k++) { 1131 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1132 if (garray[B->j[jb]] > cs/bs) break; 1133 for (l=0; l<bs; l++) 1134 colidxs[cnt++] = bs*garray[B->j[jb]] + l; 1135 } 1136 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1137 for (l=0; l<bs; l++) 1138 colidxs[cnt++] = bs*A->j[ja] + l + cs; 1139 for (; jb<B->i[i+1]; jb++) 1140 for (l=0; l<bs; l++) 1141 colidxs[cnt++] = bs*garray[B->j[jb]] + l; 1142 } 1143 } 1144 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1145 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_INT);CHKERRQ(ierr); 1146 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1147 1148 /* fill in and store nonzero values */ 1149 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1150 for (cnt=0, i=0; i<A->mbs; i++) { 1151 for (k=0; k<bs; k++) { 1152 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1153 if (garray[B->j[jb]] > cs/bs) break; 1154 for (l=0; l<bs; l++) 1155 matvals[cnt++] = B->a[bs*(bs*jb + l) + k]; 1156 } 1157 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1158 for (l=0; l<bs; l++) 1159 matvals[cnt++] = A->a[bs*(bs*ja + l) + k]; 1160 for (; jb<B->i[i+1]; jb++) 1161 for (l=0; l<bs; l++) 1162 matvals[cnt++] = B->a[bs*(bs*jb + l) + k]; 1163 } 1164 } 1165 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_SCALAR);CHKERRQ(ierr); 1166 ierr = PetscFree(matvals);CHKERRQ(ierr); 1167 1168 /* write block size option to the viewer's .info file */ 1169 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1170 PetscFunctionReturn(0); 1171 } 1172 1173 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1174 { 1175 PetscErrorCode ierr; 1176 PetscBool iascii,isdraw,issocket,isbinary; 1177 1178 PetscFunctionBegin; 1179 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1180 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1181 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1182 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1183 if (iascii || isdraw || issocket) { 1184 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1185 } else if (isbinary) { 1186 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1187 } 1188 PetscFunctionReturn(0); 1189 } 1190 1191 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1192 { 1193 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1194 PetscErrorCode ierr; 1195 1196 PetscFunctionBegin; 1197 #if defined(PETSC_USE_LOG) 1198 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1199 #endif 1200 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1201 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1202 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1203 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1204 #if defined(PETSC_USE_CTABLE) 1205 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1206 #else 1207 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1208 #endif 1209 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1210 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1211 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1212 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1213 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1214 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1215 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1216 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1217 1218 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1222 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1223 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1224 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr); 1225 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1226 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr); 1227 #if defined(PETSC_HAVE_HYPRE) 1228 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL);CHKERRQ(ierr); 1229 #endif 1230 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL);CHKERRQ(ierr); 1231 PetscFunctionReturn(0); 1232 } 1233 1234 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1235 { 1236 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1237 PetscErrorCode ierr; 1238 PetscInt nt; 1239 1240 PetscFunctionBegin; 1241 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1242 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1243 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1244 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1245 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1246 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1247 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1248 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1249 PetscFunctionReturn(0); 1250 } 1251 1252 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1253 { 1254 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1255 PetscErrorCode ierr; 1256 1257 PetscFunctionBegin; 1258 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1259 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1260 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1261 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1262 PetscFunctionReturn(0); 1263 } 1264 1265 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1266 { 1267 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1268 PetscErrorCode ierr; 1269 1270 PetscFunctionBegin; 1271 /* do nondiagonal part */ 1272 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1273 /* do local part */ 1274 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1275 /* add partial results together */ 1276 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1277 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1278 PetscFunctionReturn(0); 1279 } 1280 1281 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1282 { 1283 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1284 PetscErrorCode ierr; 1285 1286 PetscFunctionBegin; 1287 /* do nondiagonal part */ 1288 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1289 /* do local part */ 1290 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1291 /* add partial results together */ 1292 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1293 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1294 PetscFunctionReturn(0); 1295 } 1296 1297 /* 1298 This only works correctly for square matrices where the subblock A->A is the 1299 diagonal block 1300 */ 1301 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1302 { 1303 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1304 PetscErrorCode ierr; 1305 1306 PetscFunctionBegin; 1307 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1308 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1309 PetscFunctionReturn(0); 1310 } 1311 1312 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1313 { 1314 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1315 PetscErrorCode ierr; 1316 1317 PetscFunctionBegin; 1318 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1319 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1320 PetscFunctionReturn(0); 1321 } 1322 1323 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1324 { 1325 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1326 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1327 PetscErrorCode ierr; 1328 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1329 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1330 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1331 1332 PetscFunctionBegin; 1333 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1334 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1335 mat->getrowactive = PETSC_TRUE; 1336 1337 if (!mat->rowvalues && (idx || v)) { 1338 /* 1339 allocate enough space to hold information from the longest row. 1340 */ 1341 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1342 PetscInt max = 1,mbs = mat->mbs,tmp; 1343 for (i=0; i<mbs; i++) { 1344 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1345 if (max < tmp) max = tmp; 1346 } 1347 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1348 } 1349 lrow = row - brstart; 1350 1351 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1352 if (!v) {pvA = 0; pvB = 0;} 1353 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1354 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1355 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1356 nztot = nzA + nzB; 1357 1358 cmap = mat->garray; 1359 if (v || idx) { 1360 if (nztot) { 1361 /* Sort by increasing column numbers, assuming A and B already sorted */ 1362 PetscInt imark = -1; 1363 if (v) { 1364 *v = v_p = mat->rowvalues; 1365 for (i=0; i<nzB; i++) { 1366 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1367 else break; 1368 } 1369 imark = i; 1370 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1371 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1372 } 1373 if (idx) { 1374 *idx = idx_p = mat->rowindices; 1375 if (imark > -1) { 1376 for (i=0; i<imark; i++) { 1377 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1378 } 1379 } else { 1380 for (i=0; i<nzB; i++) { 1381 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1382 else break; 1383 } 1384 imark = i; 1385 } 1386 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1387 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1388 } 1389 } else { 1390 if (idx) *idx = 0; 1391 if (v) *v = 0; 1392 } 1393 } 1394 *nz = nztot; 1395 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1396 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1397 PetscFunctionReturn(0); 1398 } 1399 1400 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1401 { 1402 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1403 1404 PetscFunctionBegin; 1405 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1406 baij->getrowactive = PETSC_FALSE; 1407 PetscFunctionReturn(0); 1408 } 1409 1410 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1411 { 1412 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1413 PetscErrorCode ierr; 1414 1415 PetscFunctionBegin; 1416 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1417 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1418 PetscFunctionReturn(0); 1419 } 1420 1421 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1422 { 1423 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1424 Mat A = a->A,B = a->B; 1425 PetscErrorCode ierr; 1426 PetscLogDouble isend[5],irecv[5]; 1427 1428 PetscFunctionBegin; 1429 info->block_size = (PetscReal)matin->rmap->bs; 1430 1431 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1432 1433 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1434 isend[3] = info->memory; isend[4] = info->mallocs; 1435 1436 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1437 1438 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1439 isend[3] += info->memory; isend[4] += info->mallocs; 1440 1441 if (flag == MAT_LOCAL) { 1442 info->nz_used = isend[0]; 1443 info->nz_allocated = isend[1]; 1444 info->nz_unneeded = isend[2]; 1445 info->memory = isend[3]; 1446 info->mallocs = isend[4]; 1447 } else if (flag == MAT_GLOBAL_MAX) { 1448 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1449 1450 info->nz_used = irecv[0]; 1451 info->nz_allocated = irecv[1]; 1452 info->nz_unneeded = irecv[2]; 1453 info->memory = irecv[3]; 1454 info->mallocs = irecv[4]; 1455 } else if (flag == MAT_GLOBAL_SUM) { 1456 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1457 1458 info->nz_used = irecv[0]; 1459 info->nz_allocated = irecv[1]; 1460 info->nz_unneeded = irecv[2]; 1461 info->memory = irecv[3]; 1462 info->mallocs = irecv[4]; 1463 } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1464 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1465 info->fill_ratio_needed = 0; 1466 info->factor_mallocs = 0; 1467 PetscFunctionReturn(0); 1468 } 1469 1470 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1471 { 1472 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1473 PetscErrorCode ierr; 1474 1475 PetscFunctionBegin; 1476 switch (op) { 1477 case MAT_NEW_NONZERO_LOCATIONS: 1478 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1479 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1480 case MAT_KEEP_NONZERO_PATTERN: 1481 case MAT_NEW_NONZERO_LOCATION_ERR: 1482 MatCheckPreallocated(A,1); 1483 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1484 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1485 break; 1486 case MAT_ROW_ORIENTED: 1487 MatCheckPreallocated(A,1); 1488 a->roworiented = flg; 1489 1490 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1491 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1492 break; 1493 case MAT_NEW_DIAGONALS: 1494 case MAT_SORTED_FULL: 1495 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1496 break; 1497 case MAT_IGNORE_OFF_PROC_ENTRIES: 1498 a->donotstash = flg; 1499 break; 1500 case MAT_USE_HASH_TABLE: 1501 a->ht_flag = flg; 1502 a->ht_fact = 1.39; 1503 break; 1504 case MAT_SYMMETRIC: 1505 case MAT_STRUCTURALLY_SYMMETRIC: 1506 case MAT_HERMITIAN: 1507 case MAT_SUBMAT_SINGLEIS: 1508 case MAT_SYMMETRY_ETERNAL: 1509 MatCheckPreallocated(A,1); 1510 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1511 break; 1512 default: 1513 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1514 } 1515 PetscFunctionReturn(0); 1516 } 1517 1518 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1519 { 1520 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1521 Mat_SeqBAIJ *Aloc; 1522 Mat B; 1523 PetscErrorCode ierr; 1524 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1525 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1526 MatScalar *a; 1527 1528 PetscFunctionBegin; 1529 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1530 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1531 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1532 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1533 /* Do not know preallocation information, but must set block size */ 1534 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr); 1535 } else { 1536 B = *matout; 1537 } 1538 1539 /* copy over the A part */ 1540 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1541 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1542 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1543 1544 for (i=0; i<mbs; i++) { 1545 rvals[0] = bs*(baij->rstartbs + i); 1546 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1547 for (j=ai[i]; j<ai[i+1]; j++) { 1548 col = (baij->cstartbs+aj[j])*bs; 1549 for (k=0; k<bs; k++) { 1550 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1551 1552 col++; a += bs; 1553 } 1554 } 1555 } 1556 /* copy over the B part */ 1557 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1558 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1559 for (i=0; i<mbs; i++) { 1560 rvals[0] = bs*(baij->rstartbs + i); 1561 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1562 for (j=ai[i]; j<ai[i+1]; j++) { 1563 col = baij->garray[aj[j]]*bs; 1564 for (k=0; k<bs; k++) { 1565 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1566 col++; 1567 a += bs; 1568 } 1569 } 1570 } 1571 ierr = PetscFree(rvals);CHKERRQ(ierr); 1572 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1573 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1574 1575 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1576 else { 1577 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 1578 } 1579 PetscFunctionReturn(0); 1580 } 1581 1582 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1583 { 1584 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1585 Mat a = baij->A,b = baij->B; 1586 PetscErrorCode ierr; 1587 PetscInt s1,s2,s3; 1588 1589 PetscFunctionBegin; 1590 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1591 if (rr) { 1592 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1593 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1594 /* Overlap communication with computation. */ 1595 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1596 } 1597 if (ll) { 1598 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1599 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1600 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1601 } 1602 /* scale the diagonal block */ 1603 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1604 1605 if (rr) { 1606 /* Do a scatter end and then right scale the off-diagonal block */ 1607 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1608 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1609 } 1610 PetscFunctionReturn(0); 1611 } 1612 1613 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1614 { 1615 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1616 PetscInt *lrows; 1617 PetscInt r, len; 1618 PetscBool cong; 1619 PetscErrorCode ierr; 1620 1621 PetscFunctionBegin; 1622 /* get locally owned rows */ 1623 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 1624 /* fix right hand side if needed */ 1625 if (x && b) { 1626 const PetscScalar *xx; 1627 PetscScalar *bb; 1628 1629 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1630 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1631 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 1632 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1633 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1634 } 1635 1636 /* actually zap the local rows */ 1637 /* 1638 Zero the required rows. If the "diagonal block" of the matrix 1639 is square and the user wishes to set the diagonal we use separate 1640 code so that MatSetValues() is not called for each diagonal allocating 1641 new memory, thus calling lots of mallocs and slowing things down. 1642 1643 */ 1644 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1645 ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1646 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 1647 if ((diag != 0.0) && cong) { 1648 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr); 1649 } else if (diag != 0.0) { 1650 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr); 1651 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1652 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1653 for (r = 0; r < len; ++r) { 1654 const PetscInt row = lrows[r] + A->rmap->rstart; 1655 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1656 } 1657 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1658 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1659 } else { 1660 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1661 } 1662 ierr = PetscFree(lrows);CHKERRQ(ierr); 1663 1664 /* only change matrix nonzero state if pattern was allowed to be changed */ 1665 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1666 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1667 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1668 } 1669 PetscFunctionReturn(0); 1670 } 1671 1672 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1673 { 1674 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1675 PetscErrorCode ierr; 1676 PetscMPIInt n = A->rmap->n,p = 0; 1677 PetscInt i,j,k,r,len = 0,row,col,count; 1678 PetscInt *lrows,*owners = A->rmap->range; 1679 PetscSFNode *rrows; 1680 PetscSF sf; 1681 const PetscScalar *xx; 1682 PetscScalar *bb,*mask; 1683 Vec xmask,lmask; 1684 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)l->B->data; 1685 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1686 PetscScalar *aa; 1687 1688 PetscFunctionBegin; 1689 /* Create SF where leaves are input rows and roots are owned rows */ 1690 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1691 for (r = 0; r < n; ++r) lrows[r] = -1; 1692 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1693 for (r = 0; r < N; ++r) { 1694 const PetscInt idx = rows[r]; 1695 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1696 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1697 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1698 } 1699 rrows[r].rank = p; 1700 rrows[r].index = rows[r] - owners[p]; 1701 } 1702 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1703 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1704 /* Collect flags for rows to be zeroed */ 1705 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1706 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1707 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1708 /* Compress and put in row numbers */ 1709 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1710 /* zero diagonal part of matrix */ 1711 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1712 /* handle off diagonal part of matrix */ 1713 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1714 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1715 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1716 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1717 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1718 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1719 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1720 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1721 if (x) { 1722 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1723 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1724 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1725 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1726 } 1727 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1728 /* remove zeroed rows of off diagonal matrix */ 1729 for (i = 0; i < len; ++i) { 1730 row = lrows[i]; 1731 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1732 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 1733 for (k = 0; k < count; ++k) { 1734 aa[0] = 0.0; 1735 aa += bs; 1736 } 1737 } 1738 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1739 for (i = 0; i < l->B->rmap->N; ++i) { 1740 row = i/bs; 1741 for (j = baij->i[row]; j < baij->i[row+1]; ++j) { 1742 for (k = 0; k < bs; ++k) { 1743 col = bs*baij->j[j] + k; 1744 if (PetscAbsScalar(mask[col])) { 1745 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 1746 if (x) bb[i] -= aa[0]*xx[col]; 1747 aa[0] = 0.0; 1748 } 1749 } 1750 } 1751 } 1752 if (x) { 1753 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1754 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1755 } 1756 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1757 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1758 ierr = PetscFree(lrows);CHKERRQ(ierr); 1759 1760 /* only change matrix nonzero state if pattern was allowed to be changed */ 1761 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1762 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1763 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1764 } 1765 PetscFunctionReturn(0); 1766 } 1767 1768 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1769 { 1770 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1771 PetscErrorCode ierr; 1772 1773 PetscFunctionBegin; 1774 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1775 PetscFunctionReturn(0); 1776 } 1777 1778 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 1779 1780 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1781 { 1782 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1783 Mat a,b,c,d; 1784 PetscBool flg; 1785 PetscErrorCode ierr; 1786 1787 PetscFunctionBegin; 1788 a = matA->A; b = matA->B; 1789 c = matB->A; d = matB->B; 1790 1791 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1792 if (flg) { 1793 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1794 } 1795 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1796 PetscFunctionReturn(0); 1797 } 1798 1799 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1800 { 1801 PetscErrorCode ierr; 1802 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1803 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 1804 1805 PetscFunctionBegin; 1806 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1807 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1808 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1809 } else { 1810 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1811 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1812 } 1813 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 1814 PetscFunctionReturn(0); 1815 } 1816 1817 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1818 { 1819 PetscErrorCode ierr; 1820 1821 PetscFunctionBegin; 1822 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1823 PetscFunctionReturn(0); 1824 } 1825 1826 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 1827 { 1828 PetscErrorCode ierr; 1829 PetscInt bs = Y->rmap->bs,m = Y->rmap->N/bs; 1830 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 1831 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 1832 1833 PetscFunctionBegin; 1834 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 1835 PetscFunctionReturn(0); 1836 } 1837 1838 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1839 { 1840 PetscErrorCode ierr; 1841 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 1842 PetscBLASInt bnz,one=1; 1843 Mat_SeqBAIJ *x,*y; 1844 PetscInt bs2 = Y->rmap->bs*Y->rmap->bs; 1845 1846 PetscFunctionBegin; 1847 if (str == SAME_NONZERO_PATTERN) { 1848 PetscScalar alpha = a; 1849 x = (Mat_SeqBAIJ*)xx->A->data; 1850 y = (Mat_SeqBAIJ*)yy->A->data; 1851 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 1852 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1853 x = (Mat_SeqBAIJ*)xx->B->data; 1854 y = (Mat_SeqBAIJ*)yy->B->data; 1855 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 1856 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1857 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1858 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1859 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1860 } else { 1861 Mat B; 1862 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1863 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1864 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1865 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1866 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1867 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1868 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1869 ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr); 1870 ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1871 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1872 ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1873 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1874 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1875 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 1876 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1877 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1878 } 1879 PetscFunctionReturn(0); 1880 } 1881 1882 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1883 { 1884 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1885 PetscErrorCode ierr; 1886 1887 PetscFunctionBegin; 1888 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1889 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1890 PetscFunctionReturn(0); 1891 } 1892 1893 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1894 { 1895 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1896 PetscErrorCode ierr; 1897 1898 PetscFunctionBegin; 1899 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1900 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1901 PetscFunctionReturn(0); 1902 } 1903 1904 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1905 { 1906 PetscErrorCode ierr; 1907 IS iscol_local; 1908 PetscInt csize; 1909 1910 PetscFunctionBegin; 1911 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1912 if (call == MAT_REUSE_MATRIX) { 1913 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1914 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1915 } else { 1916 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1917 } 1918 ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1919 if (call == MAT_INITIAL_MATRIX) { 1920 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1921 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1922 } 1923 PetscFunctionReturn(0); 1924 } 1925 1926 /* 1927 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1928 in local and then by concatenating the local matrices the end result. 1929 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1930 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1931 */ 1932 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 1933 { 1934 PetscErrorCode ierr; 1935 PetscMPIInt rank,size; 1936 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 1937 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 1938 Mat M,Mreuse; 1939 MatScalar *vwork,*aa; 1940 MPI_Comm comm; 1941 IS isrow_new, iscol_new; 1942 Mat_SeqBAIJ *aij; 1943 1944 PetscFunctionBegin; 1945 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 1946 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1947 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1948 /* The compression and expansion should be avoided. Doesn't point 1949 out errors, might change the indices, hence buggey */ 1950 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 1951 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 1952 1953 if (call == MAT_REUSE_MATRIX) { 1954 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 1955 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1956 ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse);CHKERRQ(ierr); 1957 } else { 1958 ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse);CHKERRQ(ierr); 1959 } 1960 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 1961 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 1962 /* 1963 m - number of local rows 1964 n - number of columns (same on all processors) 1965 rstart - first row in new global matrix generated 1966 */ 1967 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 1968 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1969 m = m/bs; 1970 n = n/bs; 1971 1972 if (call == MAT_INITIAL_MATRIX) { 1973 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1974 ii = aij->i; 1975 jj = aij->j; 1976 1977 /* 1978 Determine the number of non-zeros in the diagonal and off-diagonal 1979 portions of the matrix in order to do correct preallocation 1980 */ 1981 1982 /* first get start and end of "diagonal" columns */ 1983 if (csize == PETSC_DECIDE) { 1984 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 1985 if (mglobal == n*bs) { /* square matrix */ 1986 nlocal = m; 1987 } else { 1988 nlocal = n/size + ((n % size) > rank); 1989 } 1990 } else { 1991 nlocal = csize/bs; 1992 } 1993 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1994 rstart = rend - nlocal; 1995 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 1996 1997 /* next, compute all the lengths */ 1998 ierr = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr); 1999 for (i=0; i<m; i++) { 2000 jend = ii[i+1] - ii[i]; 2001 olen = 0; 2002 dlen = 0; 2003 for (j=0; j<jend; j++) { 2004 if (*jj < rstart || *jj >= rend) olen++; 2005 else dlen++; 2006 jj++; 2007 } 2008 olens[i] = olen; 2009 dlens[i] = dlen; 2010 } 2011 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2012 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2013 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2014 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2015 ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2016 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 2017 } else { 2018 PetscInt ml,nl; 2019 2020 M = *newmat; 2021 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2022 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2023 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2024 /* 2025 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2026 rather than the slower MatSetValues(). 2027 */ 2028 M->was_assembled = PETSC_TRUE; 2029 M->assembled = PETSC_FALSE; 2030 } 2031 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2032 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2033 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2034 ii = aij->i; 2035 jj = aij->j; 2036 aa = aij->a; 2037 for (i=0; i<m; i++) { 2038 row = rstart/bs + i; 2039 nz = ii[i+1] - ii[i]; 2040 cwork = jj; jj += nz; 2041 vwork = aa; aa += nz*bs*bs; 2042 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2043 } 2044 2045 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2046 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2047 *newmat = M; 2048 2049 /* save submatrix used in processor for next request */ 2050 if (call == MAT_INITIAL_MATRIX) { 2051 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2052 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2053 } 2054 PetscFunctionReturn(0); 2055 } 2056 2057 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2058 { 2059 MPI_Comm comm,pcomm; 2060 PetscInt clocal_size,nrows; 2061 const PetscInt *rows; 2062 PetscMPIInt size; 2063 IS crowp,lcolp; 2064 PetscErrorCode ierr; 2065 2066 PetscFunctionBegin; 2067 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2068 /* make a collective version of 'rowp' */ 2069 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2070 if (pcomm==comm) { 2071 crowp = rowp; 2072 } else { 2073 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2074 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2075 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2076 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2077 } 2078 ierr = ISSetPermutation(crowp);CHKERRQ(ierr); 2079 /* make a local version of 'colp' */ 2080 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2081 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2082 if (size==1) { 2083 lcolp = colp; 2084 } else { 2085 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 2086 } 2087 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2088 /* now we just get the submatrix */ 2089 ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr); 2090 ierr = MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2091 /* clean up */ 2092 if (pcomm!=comm) { 2093 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2094 } 2095 if (size>1) { 2096 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2097 } 2098 PetscFunctionReturn(0); 2099 } 2100 2101 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2102 { 2103 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2104 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2105 2106 PetscFunctionBegin; 2107 if (nghosts) *nghosts = B->nbs; 2108 if (ghosts) *ghosts = baij->garray; 2109 PetscFunctionReturn(0); 2110 } 2111 2112 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2113 { 2114 Mat B; 2115 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2116 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2117 Mat_SeqAIJ *b; 2118 PetscErrorCode ierr; 2119 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2120 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2121 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2122 2123 PetscFunctionBegin; 2124 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2125 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2126 2127 /* ---------------------------------------------------------------- 2128 Tell every processor the number of nonzeros per row 2129 */ 2130 ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr); 2131 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2132 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2133 } 2134 ierr = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr); 2135 displs = recvcounts + size; 2136 for (i=0; i<size; i++) { 2137 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2138 displs[i] = A->rmap->range[i]/bs; 2139 } 2140 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2141 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2142 #else 2143 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2144 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2145 #endif 2146 /* --------------------------------------------------------------- 2147 Create the sequential matrix of the same type as the local block diagonal 2148 */ 2149 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2150 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2151 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2152 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2153 b = (Mat_SeqAIJ*)B->data; 2154 2155 /*-------------------------------------------------------------------- 2156 Copy my part of matrix column indices over 2157 */ 2158 sendcount = ad->nz + bd->nz; 2159 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2160 a_jsendbuf = ad->j; 2161 b_jsendbuf = bd->j; 2162 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2163 cnt = 0; 2164 for (i=0; i<n; i++) { 2165 2166 /* put in lower diagonal portion */ 2167 m = bd->i[i+1] - bd->i[i]; 2168 while (m > 0) { 2169 /* is it above diagonal (in bd (compressed) numbering) */ 2170 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2171 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2172 m--; 2173 } 2174 2175 /* put in diagonal portion */ 2176 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2177 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2178 } 2179 2180 /* put in upper diagonal portion */ 2181 while (m-- > 0) { 2182 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2183 } 2184 } 2185 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2186 2187 /*-------------------------------------------------------------------- 2188 Gather all column indices to all processors 2189 */ 2190 for (i=0; i<size; i++) { 2191 recvcounts[i] = 0; 2192 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2193 recvcounts[i] += lens[j]; 2194 } 2195 } 2196 displs[0] = 0; 2197 for (i=1; i<size; i++) { 2198 displs[i] = displs[i-1] + recvcounts[i-1]; 2199 } 2200 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2201 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2202 #else 2203 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2204 #endif 2205 /*-------------------------------------------------------------------- 2206 Assemble the matrix into useable form (note numerical values not yet set) 2207 */ 2208 /* set the b->ilen (length of each row) values */ 2209 ierr = PetscArraycpy(b->ilen,lens,A->rmap->N/bs);CHKERRQ(ierr); 2210 /* set the b->i indices */ 2211 b->i[0] = 0; 2212 for (i=1; i<=A->rmap->N/bs; i++) { 2213 b->i[i] = b->i[i-1] + lens[i-1]; 2214 } 2215 ierr = PetscFree(lens);CHKERRQ(ierr); 2216 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2217 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2218 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2219 2220 if (A->symmetric) { 2221 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2222 } else if (A->hermitian) { 2223 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2224 } else if (A->structurally_symmetric) { 2225 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2226 } 2227 *newmat = B; 2228 PetscFunctionReturn(0); 2229 } 2230 2231 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2232 { 2233 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2234 PetscErrorCode ierr; 2235 Vec bb1 = 0; 2236 2237 PetscFunctionBegin; 2238 if (flag == SOR_APPLY_UPPER) { 2239 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2240 PetscFunctionReturn(0); 2241 } 2242 2243 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2244 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2245 } 2246 2247 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2248 if (flag & SOR_ZERO_INITIAL_GUESS) { 2249 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2250 its--; 2251 } 2252 2253 while (its--) { 2254 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2255 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2256 2257 /* update rhs: bb1 = bb - B*x */ 2258 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2259 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2260 2261 /* local sweep */ 2262 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2263 } 2264 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2265 if (flag & SOR_ZERO_INITIAL_GUESS) { 2266 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2267 its--; 2268 } 2269 while (its--) { 2270 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2271 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2272 2273 /* update rhs: bb1 = bb - B*x */ 2274 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2275 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2276 2277 /* local sweep */ 2278 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2279 } 2280 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2281 if (flag & SOR_ZERO_INITIAL_GUESS) { 2282 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2283 its--; 2284 } 2285 while (its--) { 2286 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2287 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2288 2289 /* update rhs: bb1 = bb - B*x */ 2290 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2291 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2292 2293 /* local sweep */ 2294 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2295 } 2296 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2297 2298 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2299 PetscFunctionReturn(0); 2300 } 2301 2302 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms) 2303 { 2304 PetscErrorCode ierr; 2305 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data; 2306 PetscInt N,i,*garray = aij->garray; 2307 PetscInt ib,jb,bs = A->rmap->bs; 2308 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data; 2309 MatScalar *a_val = a_aij->a; 2310 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data; 2311 MatScalar *b_val = b_aij->a; 2312 PetscReal *work; 2313 2314 PetscFunctionBegin; 2315 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 2316 ierr = PetscCalloc1(N,&work);CHKERRQ(ierr); 2317 if (type == NORM_2) { 2318 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2319 for (jb=0; jb<bs; jb++) { 2320 for (ib=0; ib<bs; ib++) { 2321 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2322 a_val++; 2323 } 2324 } 2325 } 2326 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2327 for (jb=0; jb<bs; jb++) { 2328 for (ib=0; ib<bs; ib++) { 2329 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2330 b_val++; 2331 } 2332 } 2333 } 2334 } else if (type == NORM_1) { 2335 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2336 for (jb=0; jb<bs; jb++) { 2337 for (ib=0; ib<bs; ib++) { 2338 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2339 a_val++; 2340 } 2341 } 2342 } 2343 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2344 for (jb=0; jb<bs; jb++) { 2345 for (ib=0; ib<bs; ib++) { 2346 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2347 b_val++; 2348 } 2349 } 2350 } 2351 } else if (type == NORM_INFINITY) { 2352 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2353 for (jb=0; jb<bs; jb++) { 2354 for (ib=0; ib<bs; ib++) { 2355 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2356 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2357 a_val++; 2358 } 2359 } 2360 } 2361 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2362 for (jb=0; jb<bs; jb++) { 2363 for (ib=0; ib<bs; ib++) { 2364 int col = garray[b_aij->j[i]] * bs + jb; 2365 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2366 b_val++; 2367 } 2368 } 2369 } 2370 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 2371 if (type == NORM_INFINITY) { 2372 ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2373 } else { 2374 ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2375 } 2376 ierr = PetscFree(work);CHKERRQ(ierr); 2377 if (type == NORM_2) { 2378 for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]); 2379 } 2380 PetscFunctionReturn(0); 2381 } 2382 2383 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2384 { 2385 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2386 PetscErrorCode ierr; 2387 2388 PetscFunctionBegin; 2389 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2390 A->factorerrortype = a->A->factorerrortype; 2391 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2392 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2393 PetscFunctionReturn(0); 2394 } 2395 2396 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a) 2397 { 2398 PetscErrorCode ierr; 2399 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ*)Y->data; 2400 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)maij->A->data; 2401 2402 PetscFunctionBegin; 2403 if (!Y->preallocated) { 2404 ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 2405 } else if (!aij->nz) { 2406 PetscInt nonew = aij->nonew; 2407 ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2408 aij->nonew = nonew; 2409 } 2410 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2411 PetscFunctionReturn(0); 2412 } 2413 2414 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool *missing,PetscInt *d) 2415 { 2416 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2417 PetscErrorCode ierr; 2418 2419 PetscFunctionBegin; 2420 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2421 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2422 if (d) { 2423 PetscInt rstart; 2424 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2425 *d += rstart/A->rmap->bs; 2426 2427 } 2428 PetscFunctionReturn(0); 2429 } 2430 2431 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2432 { 2433 PetscFunctionBegin; 2434 *a = ((Mat_MPIBAIJ*)A->data)->A; 2435 PetscFunctionReturn(0); 2436 } 2437 2438 /* -------------------------------------------------------------------*/ 2439 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2440 MatGetRow_MPIBAIJ, 2441 MatRestoreRow_MPIBAIJ, 2442 MatMult_MPIBAIJ, 2443 /* 4*/ MatMultAdd_MPIBAIJ, 2444 MatMultTranspose_MPIBAIJ, 2445 MatMultTransposeAdd_MPIBAIJ, 2446 0, 2447 0, 2448 0, 2449 /*10*/ 0, 2450 0, 2451 0, 2452 MatSOR_MPIBAIJ, 2453 MatTranspose_MPIBAIJ, 2454 /*15*/ MatGetInfo_MPIBAIJ, 2455 MatEqual_MPIBAIJ, 2456 MatGetDiagonal_MPIBAIJ, 2457 MatDiagonalScale_MPIBAIJ, 2458 MatNorm_MPIBAIJ, 2459 /*20*/ MatAssemblyBegin_MPIBAIJ, 2460 MatAssemblyEnd_MPIBAIJ, 2461 MatSetOption_MPIBAIJ, 2462 MatZeroEntries_MPIBAIJ, 2463 /*24*/ MatZeroRows_MPIBAIJ, 2464 0, 2465 0, 2466 0, 2467 0, 2468 /*29*/ MatSetUp_MPIBAIJ, 2469 0, 2470 0, 2471 MatGetDiagonalBlock_MPIBAIJ, 2472 0, 2473 /*34*/ MatDuplicate_MPIBAIJ, 2474 0, 2475 0, 2476 0, 2477 0, 2478 /*39*/ MatAXPY_MPIBAIJ, 2479 MatCreateSubMatrices_MPIBAIJ, 2480 MatIncreaseOverlap_MPIBAIJ, 2481 MatGetValues_MPIBAIJ, 2482 MatCopy_MPIBAIJ, 2483 /*44*/ 0, 2484 MatScale_MPIBAIJ, 2485 MatShift_MPIBAIJ, 2486 0, 2487 MatZeroRowsColumns_MPIBAIJ, 2488 /*49*/ 0, 2489 0, 2490 0, 2491 0, 2492 0, 2493 /*54*/ MatFDColoringCreate_MPIXAIJ, 2494 0, 2495 MatSetUnfactored_MPIBAIJ, 2496 MatPermute_MPIBAIJ, 2497 MatSetValuesBlocked_MPIBAIJ, 2498 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2499 MatDestroy_MPIBAIJ, 2500 MatView_MPIBAIJ, 2501 0, 2502 0, 2503 /*64*/ 0, 2504 0, 2505 0, 2506 0, 2507 0, 2508 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2509 0, 2510 0, 2511 0, 2512 0, 2513 /*74*/ 0, 2514 MatFDColoringApply_BAIJ, 2515 0, 2516 0, 2517 0, 2518 /*79*/ 0, 2519 0, 2520 0, 2521 0, 2522 MatLoad_MPIBAIJ, 2523 /*84*/ 0, 2524 0, 2525 0, 2526 0, 2527 0, 2528 /*89*/ 0, 2529 0, 2530 0, 2531 0, 2532 0, 2533 /*94*/ 0, 2534 0, 2535 0, 2536 0, 2537 0, 2538 /*99*/ 0, 2539 0, 2540 0, 2541 0, 2542 0, 2543 /*104*/0, 2544 MatRealPart_MPIBAIJ, 2545 MatImaginaryPart_MPIBAIJ, 2546 0, 2547 0, 2548 /*109*/0, 2549 0, 2550 0, 2551 0, 2552 MatMissingDiagonal_MPIBAIJ, 2553 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2554 0, 2555 MatGetGhosts_MPIBAIJ, 2556 0, 2557 0, 2558 /*119*/0, 2559 0, 2560 0, 2561 0, 2562 MatGetMultiProcBlock_MPIBAIJ, 2563 /*124*/0, 2564 MatGetColumnNorms_MPIBAIJ, 2565 MatInvertBlockDiagonal_MPIBAIJ, 2566 0, 2567 0, 2568 /*129*/ 0, 2569 0, 2570 0, 2571 0, 2572 0, 2573 /*134*/ 0, 2574 0, 2575 0, 2576 0, 2577 0, 2578 /*139*/ MatSetBlockSizes_Default, 2579 0, 2580 0, 2581 MatFDColoringSetUp_MPIXAIJ, 2582 0, 2583 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ 2584 }; 2585 2586 2587 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 2588 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 2589 2590 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2591 { 2592 PetscInt m,rstart,cstart,cend; 2593 PetscInt i,j,dlen,olen,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2594 const PetscInt *JJ =0; 2595 PetscScalar *values=0; 2596 PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented; 2597 PetscErrorCode ierr; 2598 PetscBool nooffprocentries; 2599 2600 PetscFunctionBegin; 2601 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2602 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2603 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2604 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2605 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2606 m = B->rmap->n/bs; 2607 rstart = B->rmap->rstart/bs; 2608 cstart = B->cmap->rstart/bs; 2609 cend = B->cmap->rend/bs; 2610 2611 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2612 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2613 for (i=0; i<m; i++) { 2614 nz = ii[i+1] - ii[i]; 2615 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2616 nz_max = PetscMax(nz_max,nz); 2617 dlen = 0; 2618 olen = 0; 2619 JJ = jj + ii[i]; 2620 for (j=0; j<nz; j++) { 2621 if (*JJ < cstart || *JJ >= cend) olen++; 2622 else dlen++; 2623 JJ++; 2624 } 2625 d_nnz[i] = dlen; 2626 o_nnz[i] = olen; 2627 } 2628 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2629 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2630 2631 values = (PetscScalar*)V; 2632 if (!values) { 2633 ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2634 } 2635 for (i=0; i<m; i++) { 2636 PetscInt row = i + rstart; 2637 PetscInt ncols = ii[i+1] - ii[i]; 2638 const PetscInt *icols = jj + ii[i]; 2639 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2640 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2641 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2642 } else { /* block ordering does not match so we can only insert one block at a time. */ 2643 PetscInt j; 2644 for (j=0; j<ncols; j++) { 2645 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2646 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2647 } 2648 } 2649 } 2650 2651 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2652 nooffprocentries = B->nooffprocentries; 2653 B->nooffprocentries = PETSC_TRUE; 2654 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2655 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2656 B->nooffprocentries = nooffprocentries; 2657 2658 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2659 PetscFunctionReturn(0); 2660 } 2661 2662 /*@C 2663 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values 2664 2665 Collective 2666 2667 Input Parameters: 2668 + B - the matrix 2669 . bs - the block size 2670 . i - the indices into j for the start of each local row (starts with zero) 2671 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2672 - v - optional values in the matrix 2673 2674 Level: advanced 2675 2676 Notes: 2677 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 2678 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 2679 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2680 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2681 block column and the second index is over columns within a block. 2682 2683 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2684 2685 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ 2686 @*/ 2687 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2688 { 2689 PetscErrorCode ierr; 2690 2691 PetscFunctionBegin; 2692 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2693 PetscValidType(B,1); 2694 PetscValidLogicalCollectiveInt(B,bs,2); 2695 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2696 PetscFunctionReturn(0); 2697 } 2698 2699 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2700 { 2701 Mat_MPIBAIJ *b; 2702 PetscErrorCode ierr; 2703 PetscInt i; 2704 PetscMPIInt size; 2705 2706 PetscFunctionBegin; 2707 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2708 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2709 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2710 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2711 2712 if (d_nnz) { 2713 for (i=0; i<B->rmap->n/bs; i++) { 2714 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2715 } 2716 } 2717 if (o_nnz) { 2718 for (i=0; i<B->rmap->n/bs; i++) { 2719 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2720 } 2721 } 2722 2723 b = (Mat_MPIBAIJ*)B->data; 2724 b->bs2 = bs*bs; 2725 b->mbs = B->rmap->n/bs; 2726 b->nbs = B->cmap->n/bs; 2727 b->Mbs = B->rmap->N/bs; 2728 b->Nbs = B->cmap->N/bs; 2729 2730 for (i=0; i<=b->size; i++) { 2731 b->rangebs[i] = B->rmap->range[i]/bs; 2732 } 2733 b->rstartbs = B->rmap->rstart/bs; 2734 b->rendbs = B->rmap->rend/bs; 2735 b->cstartbs = B->cmap->rstart/bs; 2736 b->cendbs = B->cmap->rend/bs; 2737 2738 #if defined(PETSC_USE_CTABLE) 2739 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2740 #else 2741 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2742 #endif 2743 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2744 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2745 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2746 2747 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2748 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2749 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2750 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2751 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2752 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2753 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2754 2755 if (!B->preallocated) { 2756 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2757 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2758 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2759 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2760 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2761 } 2762 2763 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2764 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2765 B->preallocated = PETSC_TRUE; 2766 B->was_assembled = PETSC_FALSE; 2767 B->assembled = PETSC_FALSE; 2768 PetscFunctionReturn(0); 2769 } 2770 2771 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2772 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2773 2774 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 2775 { 2776 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2777 PetscErrorCode ierr; 2778 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2779 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2780 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2781 2782 PetscFunctionBegin; 2783 ierr = PetscMalloc1(M+1,&ii);CHKERRQ(ierr); 2784 ii[0] = 0; 2785 for (i=0; i<M; i++) { 2786 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2787 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 2788 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2789 /* remove one from count of matrix has diagonal */ 2790 for (j=id[i]; j<id[i+1]; j++) { 2791 if (jd[j] == i) {ii[i+1]--;break;} 2792 } 2793 } 2794 ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr); 2795 cnt = 0; 2796 for (i=0; i<M; i++) { 2797 for (j=io[i]; j<io[i+1]; j++) { 2798 if (garray[jo[j]] > rstart) break; 2799 jj[cnt++] = garray[jo[j]]; 2800 } 2801 for (k=id[i]; k<id[i+1]; k++) { 2802 if (jd[k] != i) { 2803 jj[cnt++] = rstart + jd[k]; 2804 } 2805 } 2806 for (; j<io[i+1]; j++) { 2807 jj[cnt++] = garray[jo[j]]; 2808 } 2809 } 2810 ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr); 2811 PetscFunctionReturn(0); 2812 } 2813 2814 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2815 2816 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 2817 2818 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 2819 { 2820 PetscErrorCode ierr; 2821 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2822 Mat B; 2823 Mat_MPIAIJ *b; 2824 2825 PetscFunctionBegin; 2826 if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 2827 2828 if (reuse == MAT_REUSE_MATRIX) { 2829 B = *newmat; 2830 } else { 2831 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2832 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2833 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2834 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2835 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 2836 ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 2837 } 2838 b = (Mat_MPIAIJ*) B->data; 2839 2840 if (reuse == MAT_REUSE_MATRIX) { 2841 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A);CHKERRQ(ierr); 2842 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B);CHKERRQ(ierr); 2843 } else { 2844 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 2845 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2846 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 2847 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 2848 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 2849 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2850 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2851 } 2852 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2853 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2854 2855 if (reuse == MAT_INPLACE_MATRIX) { 2856 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 2857 } else { 2858 *newmat = B; 2859 } 2860 PetscFunctionReturn(0); 2861 } 2862 2863 /*MC 2864 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2865 2866 Options Database Keys: 2867 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2868 . -mat_block_size <bs> - set the blocksize used to store the matrix 2869 - -mat_use_hash_table <fact> 2870 2871 Level: beginner 2872 2873 Notes: 2874 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 2875 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 2876 2877 .seealso: MatCreateBAIJ 2878 M*/ 2879 2880 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 2881 2882 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2883 { 2884 Mat_MPIBAIJ *b; 2885 PetscErrorCode ierr; 2886 PetscBool flg = PETSC_FALSE; 2887 2888 PetscFunctionBegin; 2889 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2890 B->data = (void*)b; 2891 2892 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2893 B->assembled = PETSC_FALSE; 2894 2895 B->insertmode = NOT_SET_VALUES; 2896 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2897 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2898 2899 /* build local table of row and column ownerships */ 2900 ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr); 2901 2902 /* build cache for off array entries formed */ 2903 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 2904 2905 b->donotstash = PETSC_FALSE; 2906 b->colmap = NULL; 2907 b->garray = NULL; 2908 b->roworiented = PETSC_TRUE; 2909 2910 /* stuff used in block assembly */ 2911 b->barray = 0; 2912 2913 /* stuff used for matrix vector multiply */ 2914 b->lvec = 0; 2915 b->Mvctx = 0; 2916 2917 /* stuff for MatGetRow() */ 2918 b->rowindices = 0; 2919 b->rowvalues = 0; 2920 b->getrowactive = PETSC_FALSE; 2921 2922 /* hash table stuff */ 2923 b->ht = 0; 2924 b->hd = 0; 2925 b->ht_size = 0; 2926 b->ht_flag = PETSC_FALSE; 2927 b->ht_fact = 0; 2928 b->ht_total_ct = 0; 2929 b->ht_insert_ct = 0; 2930 2931 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2932 b->ijonly = PETSC_FALSE; 2933 2934 2935 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 2936 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 2937 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 2938 #if defined(PETSC_HAVE_HYPRE) 2939 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 2940 #endif 2941 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2942 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2943 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2944 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 2945 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2946 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2947 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 2948 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 2949 2950 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 2951 ierr = PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg);CHKERRQ(ierr); 2952 if (flg) { 2953 PetscReal fact = 1.39; 2954 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2955 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 2956 if (fact <= 1.0) fact = 1.39; 2957 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2958 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2959 } 2960 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2961 PetscFunctionReturn(0); 2962 } 2963 2964 /*MC 2965 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2966 2967 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2968 and MATMPIBAIJ otherwise. 2969 2970 Options Database Keys: 2971 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2972 2973 Level: beginner 2974 2975 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2976 M*/ 2977 2978 /*@C 2979 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2980 (block compressed row). For good matrix assembly performance 2981 the user should preallocate the matrix storage by setting the parameters 2982 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2983 performance can be increased by more than a factor of 50. 2984 2985 Collective on Mat 2986 2987 Input Parameters: 2988 + B - the matrix 2989 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2990 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2991 . d_nz - number of block nonzeros per block row in diagonal portion of local 2992 submatrix (same for all local rows) 2993 . d_nnz - array containing the number of block nonzeros in the various block rows 2994 of the in diagonal portion of the local (possibly different for each block 2995 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 2996 set it even if it is zero. 2997 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2998 submatrix (same for all local rows). 2999 - o_nnz - array containing the number of nonzeros in the various block rows of the 3000 off-diagonal portion of the local submatrix (possibly different for 3001 each block row) or NULL. 3002 3003 If the *_nnz parameter is given then the *_nz parameter is ignored 3004 3005 Options Database Keys: 3006 + -mat_block_size - size of the blocks to use 3007 - -mat_use_hash_table <fact> 3008 3009 Notes: 3010 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3011 than it must be used on all processors that share the object for that argument. 3012 3013 Storage Information: 3014 For a square global matrix we define each processor's diagonal portion 3015 to be its local rows and the corresponding columns (a square submatrix); 3016 each processor's off-diagonal portion encompasses the remainder of the 3017 local matrix (a rectangular submatrix). 3018 3019 The user can specify preallocated storage for the diagonal part of 3020 the local submatrix with either d_nz or d_nnz (not both). Set 3021 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3022 memory allocation. Likewise, specify preallocated storage for the 3023 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3024 3025 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3026 the figure below we depict these three local rows and all columns (0-11). 3027 3028 .vb 3029 0 1 2 3 4 5 6 7 8 9 10 11 3030 -------------------------- 3031 row 3 |o o o d d d o o o o o o 3032 row 4 |o o o d d d o o o o o o 3033 row 5 |o o o d d d o o o o o o 3034 -------------------------- 3035 .ve 3036 3037 Thus, any entries in the d locations are stored in the d (diagonal) 3038 submatrix, and any entries in the o locations are stored in the 3039 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3040 stored simply in the MATSEQBAIJ format for compressed row storage. 3041 3042 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3043 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3044 In general, for PDE problems in which most nonzeros are near the diagonal, 3045 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3046 or you will get TERRIBLE performance; see the users' manual chapter on 3047 matrices. 3048 3049 You can call MatGetInfo() to get information on how effective the preallocation was; 3050 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3051 You can also run with the option -info and look for messages with the string 3052 malloc in them to see if additional memory allocation was needed. 3053 3054 Level: intermediate 3055 3056 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3057 @*/ 3058 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3059 { 3060 PetscErrorCode ierr; 3061 3062 PetscFunctionBegin; 3063 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3064 PetscValidType(B,1); 3065 PetscValidLogicalCollectiveInt(B,bs,2); 3066 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3067 PetscFunctionReturn(0); 3068 } 3069 3070 /*@C 3071 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3072 (block compressed row). For good matrix assembly performance 3073 the user should preallocate the matrix storage by setting the parameters 3074 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3075 performance can be increased by more than a factor of 50. 3076 3077 Collective 3078 3079 Input Parameters: 3080 + comm - MPI communicator 3081 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3082 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3083 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3084 This value should be the same as the local size used in creating the 3085 y vector for the matrix-vector product y = Ax. 3086 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3087 This value should be the same as the local size used in creating the 3088 x vector for the matrix-vector product y = Ax. 3089 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3090 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3091 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3092 submatrix (same for all local rows) 3093 . d_nnz - array containing the number of nonzero blocks in the various block rows 3094 of the in diagonal portion of the local (possibly different for each block 3095 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3096 and set it even if it is zero. 3097 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3098 submatrix (same for all local rows). 3099 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3100 off-diagonal portion of the local submatrix (possibly different for 3101 each block row) or NULL. 3102 3103 Output Parameter: 3104 . A - the matrix 3105 3106 Options Database Keys: 3107 + -mat_block_size - size of the blocks to use 3108 - -mat_use_hash_table <fact> 3109 3110 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3111 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3112 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3113 3114 Notes: 3115 If the *_nnz parameter is given then the *_nz parameter is ignored 3116 3117 A nonzero block is any block that as 1 or more nonzeros in it 3118 3119 The user MUST specify either the local or global matrix dimensions 3120 (possibly both). 3121 3122 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3123 than it must be used on all processors that share the object for that argument. 3124 3125 Storage Information: 3126 For a square global matrix we define each processor's diagonal portion 3127 to be its local rows and the corresponding columns (a square submatrix); 3128 each processor's off-diagonal portion encompasses the remainder of the 3129 local matrix (a rectangular submatrix). 3130 3131 The user can specify preallocated storage for the diagonal part of 3132 the local submatrix with either d_nz or d_nnz (not both). Set 3133 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3134 memory allocation. Likewise, specify preallocated storage for the 3135 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3136 3137 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3138 the figure below we depict these three local rows and all columns (0-11). 3139 3140 .vb 3141 0 1 2 3 4 5 6 7 8 9 10 11 3142 -------------------------- 3143 row 3 |o o o d d d o o o o o o 3144 row 4 |o o o d d d o o o o o o 3145 row 5 |o o o d d d o o o o o o 3146 -------------------------- 3147 .ve 3148 3149 Thus, any entries in the d locations are stored in the d (diagonal) 3150 submatrix, and any entries in the o locations are stored in the 3151 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3152 stored simply in the MATSEQBAIJ format for compressed row storage. 3153 3154 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3155 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3156 In general, for PDE problems in which most nonzeros are near the diagonal, 3157 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3158 or you will get TERRIBLE performance; see the users' manual chapter on 3159 matrices. 3160 3161 Level: intermediate 3162 3163 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3164 @*/ 3165 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3166 { 3167 PetscErrorCode ierr; 3168 PetscMPIInt size; 3169 3170 PetscFunctionBegin; 3171 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3172 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3173 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3174 if (size > 1) { 3175 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3176 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3177 } else { 3178 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3179 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3180 } 3181 PetscFunctionReturn(0); 3182 } 3183 3184 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3185 { 3186 Mat mat; 3187 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3188 PetscErrorCode ierr; 3189 PetscInt len=0; 3190 3191 PetscFunctionBegin; 3192 *newmat = 0; 3193 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3194 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3195 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3196 3197 mat->factortype = matin->factortype; 3198 mat->preallocated = PETSC_TRUE; 3199 mat->assembled = PETSC_TRUE; 3200 mat->insertmode = NOT_SET_VALUES; 3201 3202 a = (Mat_MPIBAIJ*)mat->data; 3203 mat->rmap->bs = matin->rmap->bs; 3204 a->bs2 = oldmat->bs2; 3205 a->mbs = oldmat->mbs; 3206 a->nbs = oldmat->nbs; 3207 a->Mbs = oldmat->Mbs; 3208 a->Nbs = oldmat->Nbs; 3209 3210 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3211 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3212 3213 a->size = oldmat->size; 3214 a->rank = oldmat->rank; 3215 a->donotstash = oldmat->donotstash; 3216 a->roworiented = oldmat->roworiented; 3217 a->rowindices = 0; 3218 a->rowvalues = 0; 3219 a->getrowactive = PETSC_FALSE; 3220 a->barray = 0; 3221 a->rstartbs = oldmat->rstartbs; 3222 a->rendbs = oldmat->rendbs; 3223 a->cstartbs = oldmat->cstartbs; 3224 a->cendbs = oldmat->cendbs; 3225 3226 /* hash table stuff */ 3227 a->ht = 0; 3228 a->hd = 0; 3229 a->ht_size = 0; 3230 a->ht_flag = oldmat->ht_flag; 3231 a->ht_fact = oldmat->ht_fact; 3232 a->ht_total_ct = 0; 3233 a->ht_insert_ct = 0; 3234 3235 ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+1);CHKERRQ(ierr); 3236 if (oldmat->colmap) { 3237 #if defined(PETSC_USE_CTABLE) 3238 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3239 #else 3240 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 3241 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3242 ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr); 3243 #endif 3244 } else a->colmap = 0; 3245 3246 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3247 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 3248 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3249 ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); 3250 } else a->garray = 0; 3251 3252 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3253 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3254 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3255 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3256 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3257 3258 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3259 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3260 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3261 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3262 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3263 *newmat = mat; 3264 PetscFunctionReturn(0); 3265 } 3266 3267 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3268 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 3269 { 3270 PetscInt header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k; 3271 PetscInt *rowidxs,*colidxs,rs,cs,ce; 3272 PetscScalar *matvals; 3273 PetscErrorCode ierr; 3274 3275 PetscFunctionBegin; 3276 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3277 3278 /* read in matrix header */ 3279 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3280 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3281 M = header[1]; N = header[2]; nz = header[3]; 3282 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 3283 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 3284 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3285 3286 /* set block sizes from the viewer's .info file */ 3287 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3288 /* set local sizes if not set already */ 3289 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3290 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3291 /* set global sizes if not set already */ 3292 if (mat->rmap->N < 0) mat->rmap->N = M; 3293 if (mat->cmap->N < 0) mat->cmap->N = N; 3294 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3295 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3296 3297 /* check if the matrix sizes are correct */ 3298 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3299 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 3300 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 3301 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 3302 ierr = PetscLayoutGetRange(mat->rmap,&rs,NULL); 3303 ierr = PetscLayoutGetRange(mat->cmap,&cs,&ce); 3304 mbs = m/bs; nbs = n/bs; 3305 3306 /* read in row lengths and build row indices */ 3307 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3308 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 3309 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3310 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr); 3311 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 3312 3313 /* read in column indices and matrix values */ 3314 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 3315 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 3316 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 3317 3318 { /* preallocate matrix storage */ 3319 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3320 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3321 PetscBool sbaij,done; 3322 PetscInt *d_nnz,*o_nnz; 3323 3324 ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr); 3325 ierr = PetscHSetICreate(&ht);CHKERRQ(ierr); 3326 ierr = PetscCalloc2(mbs,&d_nnz,mbs,&o_nnz);CHKERRQ(ierr); 3327 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPISBAIJ,&sbaij);CHKERRQ(ierr); 3328 for (i=0; i<mbs; i++) { 3329 ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr); 3330 ierr = PetscHSetIClear(ht);CHKERRQ(ierr); 3331 for (k=0; k<bs; k++) { 3332 PetscInt row = bs*i + k; 3333 for (j=rowidxs[row]; j<rowidxs[row+1]; j++) { 3334 PetscInt col = colidxs[j]; 3335 if (!sbaij || col >= row) { 3336 if (col >= cs && col < ce) { 3337 if (!PetscBTLookupSet(bt,(col-cs)/bs)) d_nnz[i]++; 3338 } else { 3339 ierr = PetscHSetIQueryAdd(ht,col/bs,&done);CHKERRQ(ierr); 3340 if (done) o_nnz[i]++; 3341 } 3342 } 3343 } 3344 } 3345 } 3346 ierr = PetscBTDestroy(&bt);CHKERRQ(ierr); 3347 ierr = PetscHSetIDestroy(&ht);CHKERRQ(ierr); 3348 ierr = MatMPIBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3349 ierr = MatMPISBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3350 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3351 } 3352 3353 /* store matrix values */ 3354 for (i=0; i<m; i++) { 3355 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i+1]; 3356 ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr); 3357 } 3358 3359 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3360 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3361 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3362 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3363 PetscFunctionReturn(0); 3364 } 3365 3366 PetscErrorCode MatLoad_MPIBAIJ(Mat mat,PetscViewer viewer) 3367 { 3368 PetscErrorCode ierr; 3369 PetscBool isbinary; 3370 3371 PetscFunctionBegin; 3372 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3373 if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 3374 ierr = MatLoad_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 3375 PetscFunctionReturn(0); 3376 } 3377 3378 /*@ 3379 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3380 3381 Input Parameters: 3382 + mat - the matrix 3383 - fact - factor 3384 3385 Not Collective, each process can use a different factor 3386 3387 Level: advanced 3388 3389 Notes: 3390 This can also be set by the command line option: -mat_use_hash_table <fact> 3391 3392 .seealso: MatSetOption() 3393 @*/ 3394 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3395 { 3396 PetscErrorCode ierr; 3397 3398 PetscFunctionBegin; 3399 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3400 PetscFunctionReturn(0); 3401 } 3402 3403 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3404 { 3405 Mat_MPIBAIJ *baij; 3406 3407 PetscFunctionBegin; 3408 baij = (Mat_MPIBAIJ*)mat->data; 3409 baij->ht_fact = fact; 3410 PetscFunctionReturn(0); 3411 } 3412 3413 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3414 { 3415 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3416 PetscBool flg; 3417 PetscErrorCode ierr; 3418 3419 PetscFunctionBegin; 3420 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg);CHKERRQ(ierr); 3421 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input"); 3422 if (Ad) *Ad = a->A; 3423 if (Ao) *Ao = a->B; 3424 if (colmap) *colmap = a->garray; 3425 PetscFunctionReturn(0); 3426 } 3427 3428 /* 3429 Special version for direct calls from Fortran (to eliminate two function call overheads 3430 */ 3431 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3432 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3433 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3434 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3435 #endif 3436 3437 /*@C 3438 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3439 3440 Collective on Mat 3441 3442 Input Parameters: 3443 + mat - the matrix 3444 . min - number of input rows 3445 . im - input rows 3446 . nin - number of input columns 3447 . in - input columns 3448 . v - numerical values input 3449 - addvin - INSERT_VALUES or ADD_VALUES 3450 3451 Notes: 3452 This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3453 3454 Level: advanced 3455 3456 .seealso: MatSetValuesBlocked() 3457 @*/ 3458 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3459 { 3460 /* convert input arguments to C version */ 3461 Mat mat = *matin; 3462 PetscInt m = *min, n = *nin; 3463 InsertMode addv = *addvin; 3464 3465 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3466 const MatScalar *value; 3467 MatScalar *barray = baij->barray; 3468 PetscBool roworiented = baij->roworiented; 3469 PetscErrorCode ierr; 3470 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3471 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3472 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3473 3474 PetscFunctionBegin; 3475 /* tasks normally handled by MatSetValuesBlocked() */ 3476 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3477 else if (PetscUnlikely(mat->insertmode != addv)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3478 if (PetscUnlikely(mat->factortype)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3479 if (mat->assembled) { 3480 mat->was_assembled = PETSC_TRUE; 3481 mat->assembled = PETSC_FALSE; 3482 } 3483 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3484 3485 3486 if (!barray) { 3487 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 3488 baij->barray = barray; 3489 } 3490 3491 if (roworiented) stepval = (n-1)*bs; 3492 else stepval = (m-1)*bs; 3493 3494 for (i=0; i<m; i++) { 3495 if (im[i] < 0) continue; 3496 if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3497 if (im[i] >= rstart && im[i] < rend) { 3498 row = im[i] - rstart; 3499 for (j=0; j<n; j++) { 3500 /* If NumCol = 1 then a copy is not required */ 3501 if ((roworiented) && (n == 1)) { 3502 barray = (MatScalar*)v + i*bs2; 3503 } else if ((!roworiented) && (m == 1)) { 3504 barray = (MatScalar*)v + j*bs2; 3505 } else { /* Here a copy is required */ 3506 if (roworiented) { 3507 value = v + i*(stepval+bs)*bs + j*bs; 3508 } else { 3509 value = v + j*(stepval+bs)*bs + i*bs; 3510 } 3511 for (ii=0; ii<bs; ii++,value+=stepval) { 3512 for (jj=0; jj<bs; jj++) { 3513 *barray++ = *value++; 3514 } 3515 } 3516 barray -=bs2; 3517 } 3518 3519 if (in[j] >= cstart && in[j] < cend) { 3520 col = in[j] - cstart; 3521 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 3522 } else if (in[j] < 0) continue; 3523 else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 3524 else { 3525 if (mat->was_assembled) { 3526 if (!baij->colmap) { 3527 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3528 } 3529 3530 #if defined(PETSC_USE_DEBUG) 3531 #if defined(PETSC_USE_CTABLE) 3532 { PetscInt data; 3533 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3534 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3535 } 3536 #else 3537 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3538 #endif 3539 #endif 3540 #if defined(PETSC_USE_CTABLE) 3541 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3542 col = (col - 1)/bs; 3543 #else 3544 col = (baij->colmap[in[j]] - 1)/bs; 3545 #endif 3546 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3547 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3548 col = in[j]; 3549 } 3550 } else col = in[j]; 3551 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 3552 } 3553 } 3554 } else { 3555 if (!baij->donotstash) { 3556 if (roworiented) { 3557 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3558 } else { 3559 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3560 } 3561 } 3562 } 3563 } 3564 3565 /* task normally handled by MatSetValuesBlocked() */ 3566 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3567 PetscFunctionReturn(0); 3568 } 3569 3570 /*@ 3571 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block 3572 CSR format the local rows. 3573 3574 Collective 3575 3576 Input Parameters: 3577 + comm - MPI communicator 3578 . bs - the block size, only a block size of 1 is supported 3579 . m - number of local rows (Cannot be PETSC_DECIDE) 3580 . n - This value should be the same as the local size used in creating the 3581 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3582 calculated if N is given) For square matrices n is almost always m. 3583 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3584 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3585 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3586 . j - column indices 3587 - a - matrix values 3588 3589 Output Parameter: 3590 . mat - the matrix 3591 3592 Level: intermediate 3593 3594 Notes: 3595 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3596 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3597 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3598 3599 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3600 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3601 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3602 with column-major ordering within blocks. 3603 3604 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3605 3606 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3607 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3608 @*/ 3609 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3610 { 3611 PetscErrorCode ierr; 3612 3613 PetscFunctionBegin; 3614 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3615 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3616 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3617 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3618 ierr = MatSetType(*mat,MATMPIBAIJ);CHKERRQ(ierr); 3619 ierr = MatSetBlockSize(*mat,bs);CHKERRQ(ierr); 3620 ierr = MatSetUp(*mat);CHKERRQ(ierr); 3621 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3622 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3623 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3624 PetscFunctionReturn(0); 3625 } 3626 3627 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3628 { 3629 PetscErrorCode ierr; 3630 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3631 PetscInt *indx; 3632 PetscScalar *values; 3633 3634 PetscFunctionBegin; 3635 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3636 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3637 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inmat->data; 3638 PetscInt *dnz,*onz,mbs,Nbs,nbs; 3639 PetscInt *bindx,rmax=a->rmax,j; 3640 PetscMPIInt rank,size; 3641 3642 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3643 mbs = m/bs; Nbs = N/cbs; 3644 if (n == PETSC_DECIDE) { 3645 ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N); 3646 } 3647 nbs = n/cbs; 3648 3649 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 3650 ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */ 3651 3652 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3653 ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr); 3654 if (rank == size-1) { 3655 /* Check sum(nbs) = Nbs */ 3656 if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs); 3657 } 3658 3659 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */ 3660 for (i=0; i<mbs; i++) { 3661 ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3662 nnz = nnz/bs; 3663 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3664 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3665 ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3666 } 3667 ierr = PetscFree(bindx);CHKERRQ(ierr); 3668 3669 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3670 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3671 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3672 ierr = MatSetType(*outmat,MATBAIJ);CHKERRQ(ierr); 3673 ierr = MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr); 3674 ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3675 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3676 } 3677 3678 /* numeric phase */ 3679 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3680 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3681 3682 for (i=0; i<m; i++) { 3683 ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3684 Ii = i + rstart; 3685 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3686 ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3687 } 3688 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3689 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3690 PetscFunctionReturn(0); 3691 } 3692