1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 #if defined(PETSC_HAVE_HYPRE) 8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 9 #endif 10 11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 12 { 13 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 14 PetscErrorCode ierr; 15 PetscInt i,*idxb = 0; 16 PetscScalar *va,*vb; 17 Vec vtmp; 18 19 PetscFunctionBegin; 20 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 21 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 22 if (idx) { 23 for (i=0; i<A->rmap->n; i++) { 24 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 25 } 26 } 27 28 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 29 if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);} 30 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 31 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 32 33 for (i=0; i<A->rmap->n; i++) { 34 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 35 va[i] = vb[i]; 36 if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs); 37 } 38 } 39 40 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 41 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 42 ierr = PetscFree(idxb);CHKERRQ(ierr); 43 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 44 PetscFunctionReturn(0); 45 } 46 47 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 48 { 49 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 54 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 55 PetscFunctionReturn(0); 56 } 57 58 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 59 { 60 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 61 PetscErrorCode ierr; 62 63 PetscFunctionBegin; 64 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 65 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 66 PetscFunctionReturn(0); 67 } 68 69 /* 70 Local utility routine that creates a mapping from the global column 71 number to the local number in the off-diagonal part of the local 72 storage of the matrix. This is done in a non scalable way since the 73 length of colmap equals the global matrix length. 74 */ 75 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 76 { 77 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 78 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 79 PetscErrorCode ierr; 80 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 81 82 PetscFunctionBegin; 83 #if defined(PETSC_USE_CTABLE) 84 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 85 for (i=0; i<nbs; i++) { 86 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 87 } 88 #else 89 ierr = PetscCalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 90 ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 91 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 92 #endif 93 PetscFunctionReturn(0); 94 } 95 96 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 97 { \ 98 brow = row/bs; \ 99 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 100 rmax = aimax[brow]; nrow = ailen[brow]; \ 101 bcol = col/bs; \ 102 ridx = row % bs; cidx = col % bs; \ 103 low = 0; high = nrow; \ 104 while (high-low > 3) { \ 105 t = (low+high)/2; \ 106 if (rp[t] > bcol) high = t; \ 107 else low = t; \ 108 } \ 109 for (_i=low; _i<high; _i++) { \ 110 if (rp[_i] > bcol) break; \ 111 if (rp[_i] == bcol) { \ 112 bap = ap + bs2*_i + bs*cidx + ridx; \ 113 if (addv == ADD_VALUES) *bap += value; \ 114 else *bap = value; \ 115 goto a_noinsert; \ 116 } \ 117 } \ 118 if (a->nonew == 1) goto a_noinsert; \ 119 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 120 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 121 N = nrow++ - 1; \ 122 /* shift up all the later entries in this row */ \ 123 ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\ 124 ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \ 125 ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \ 126 rp[_i] = bcol; \ 127 ap[bs2*_i + bs*cidx + ridx] = value; \ 128 a_noinsert:; \ 129 ailen[brow] = nrow; \ 130 } 131 132 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 133 { \ 134 brow = row/bs; \ 135 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 136 rmax = bimax[brow]; nrow = bilen[brow]; \ 137 bcol = col/bs; \ 138 ridx = row % bs; cidx = col % bs; \ 139 low = 0; high = nrow; \ 140 while (high-low > 3) { \ 141 t = (low+high)/2; \ 142 if (rp[t] > bcol) high = t; \ 143 else low = t; \ 144 } \ 145 for (_i=low; _i<high; _i++) { \ 146 if (rp[_i] > bcol) break; \ 147 if (rp[_i] == bcol) { \ 148 bap = ap + bs2*_i + bs*cidx + ridx; \ 149 if (addv == ADD_VALUES) *bap += value; \ 150 else *bap = value; \ 151 goto b_noinsert; \ 152 } \ 153 } \ 154 if (b->nonew == 1) goto b_noinsert; \ 155 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 156 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 157 N = nrow++ - 1; \ 158 /* shift up all the later entries in this row */ \ 159 ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\ 160 ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr);\ 161 ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \ 162 rp[_i] = bcol; \ 163 ap[bs2*_i + bs*cidx + ridx] = value; \ 164 b_noinsert:; \ 165 bilen[brow] = nrow; \ 166 } 167 168 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 169 { 170 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 171 MatScalar value; 172 PetscBool roworiented = baij->roworiented; 173 PetscErrorCode ierr; 174 PetscInt i,j,row,col; 175 PetscInt rstart_orig=mat->rmap->rstart; 176 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 177 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 178 179 /* Some Variables required in the macro */ 180 Mat A = baij->A; 181 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 182 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 183 MatScalar *aa =a->a; 184 185 Mat B = baij->B; 186 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 187 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 188 MatScalar *ba =b->a; 189 190 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 191 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 192 MatScalar *ap,*bap; 193 194 PetscFunctionBegin; 195 for (i=0; i<m; i++) { 196 if (im[i] < 0) continue; 197 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 198 if (im[i] >= rstart_orig && im[i] < rend_orig) { 199 row = im[i] - rstart_orig; 200 for (j=0; j<n; j++) { 201 if (in[j] >= cstart_orig && in[j] < cend_orig) { 202 col = in[j] - cstart_orig; 203 if (roworiented) value = v[i*n+j]; 204 else value = v[i+j*m]; 205 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 206 } else if (in[j] < 0) continue; 207 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 208 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 209 } else { 210 if (mat->was_assembled) { 211 if (!baij->colmap) { 212 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 213 } 214 #if defined(PETSC_USE_CTABLE) 215 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 216 col = col - 1; 217 #else 218 col = baij->colmap[in[j]/bs] - 1; 219 #endif 220 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 221 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 222 col = in[j]; 223 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 224 B = baij->B; 225 b = (Mat_SeqBAIJ*)(B)->data; 226 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 227 ba =b->a; 228 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 229 else col += in[j]%bs; 230 } else col = in[j]; 231 if (roworiented) value = v[i*n+j]; 232 else value = v[i+j*m]; 233 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 234 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 235 } 236 } 237 } else { 238 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 239 if (!baij->donotstash) { 240 mat->assembled = PETSC_FALSE; 241 if (roworiented) { 242 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 243 } else { 244 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 245 } 246 } 247 } 248 } 249 PetscFunctionReturn(0); 250 } 251 252 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 253 { 254 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 255 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 256 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 257 PetscErrorCode ierr; 258 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 259 PetscBool roworiented=a->roworiented; 260 const PetscScalar *value = v; 261 MatScalar *ap,*aa = a->a,*bap; 262 263 PetscFunctionBegin; 264 rp = aj + ai[row]; 265 ap = aa + bs2*ai[row]; 266 rmax = imax[row]; 267 nrow = ailen[row]; 268 value = v; 269 low = 0; 270 high = nrow; 271 while (high-low > 7) { 272 t = (low+high)/2; 273 if (rp[t] > col) high = t; 274 else low = t; 275 } 276 for (i=low; i<high; i++) { 277 if (rp[i] > col) break; 278 if (rp[i] == col) { 279 bap = ap + bs2*i; 280 if (roworiented) { 281 if (is == ADD_VALUES) { 282 for (ii=0; ii<bs; ii++) { 283 for (jj=ii; jj<bs2; jj+=bs) { 284 bap[jj] += *value++; 285 } 286 } 287 } else { 288 for (ii=0; ii<bs; ii++) { 289 for (jj=ii; jj<bs2; jj+=bs) { 290 bap[jj] = *value++; 291 } 292 } 293 } 294 } else { 295 if (is == ADD_VALUES) { 296 for (ii=0; ii<bs; ii++,value+=bs) { 297 for (jj=0; jj<bs; jj++) { 298 bap[jj] += value[jj]; 299 } 300 bap += bs; 301 } 302 } else { 303 for (ii=0; ii<bs; ii++,value+=bs) { 304 for (jj=0; jj<bs; jj++) { 305 bap[jj] = value[jj]; 306 } 307 bap += bs; 308 } 309 } 310 } 311 goto noinsert2; 312 } 313 } 314 if (nonew == 1) goto noinsert2; 315 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 316 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 317 N = nrow++ - 1; high++; 318 /* shift up all the later entries in this row */ 319 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 320 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 321 rp[i] = col; 322 bap = ap + bs2*i; 323 if (roworiented) { 324 for (ii=0; ii<bs; ii++) { 325 for (jj=ii; jj<bs2; jj+=bs) { 326 bap[jj] = *value++; 327 } 328 } 329 } else { 330 for (ii=0; ii<bs; ii++) { 331 for (jj=0; jj<bs; jj++) { 332 *bap++ = *value++; 333 } 334 } 335 } 336 noinsert2:; 337 ailen[row] = nrow; 338 PetscFunctionReturn(0); 339 } 340 341 /* 342 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 343 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 344 */ 345 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 346 { 347 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 348 const PetscScalar *value; 349 MatScalar *barray = baij->barray; 350 PetscBool roworiented = baij->roworiented; 351 PetscErrorCode ierr; 352 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 353 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 354 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 355 356 PetscFunctionBegin; 357 if (!barray) { 358 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 359 baij->barray = barray; 360 } 361 362 if (roworiented) stepval = (n-1)*bs; 363 else stepval = (m-1)*bs; 364 365 for (i=0; i<m; i++) { 366 if (im[i] < 0) continue; 367 if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 368 if (im[i] >= rstart && im[i] < rend) { 369 row = im[i] - rstart; 370 for (j=0; j<n; j++) { 371 /* If NumCol = 1 then a copy is not required */ 372 if ((roworiented) && (n == 1)) { 373 barray = (MatScalar*)v + i*bs2; 374 } else if ((!roworiented) && (m == 1)) { 375 barray = (MatScalar*)v + j*bs2; 376 } else { /* Here a copy is required */ 377 if (roworiented) { 378 value = v + (i*(stepval+bs) + j)*bs; 379 } else { 380 value = v + (j*(stepval+bs) + i)*bs; 381 } 382 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 383 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 384 barray += bs; 385 } 386 barray -= bs2; 387 } 388 389 if (in[j] >= cstart && in[j] < cend) { 390 col = in[j] - cstart; 391 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 392 } else if (in[j] < 0) continue; 393 else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) { 394 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 395 } else { 396 if (mat->was_assembled) { 397 if (!baij->colmap) { 398 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 399 } 400 401 #if defined(PETSC_USE_DEBUG) 402 #if defined(PETSC_USE_CTABLE) 403 { PetscInt data; 404 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 405 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 406 } 407 #else 408 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 409 #endif 410 #endif 411 #if defined(PETSC_USE_CTABLE) 412 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 413 col = (col - 1)/bs; 414 #else 415 col = (baij->colmap[in[j]] - 1)/bs; 416 #endif 417 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 418 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 419 col = in[j]; 420 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]); 421 } else col = in[j]; 422 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 423 } 424 } 425 } else { 426 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 427 if (!baij->donotstash) { 428 if (roworiented) { 429 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 430 } else { 431 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 432 } 433 } 434 } 435 } 436 PetscFunctionReturn(0); 437 } 438 439 #define HASH_KEY 0.6180339887 440 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 441 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 442 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 443 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 444 { 445 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 446 PetscBool roworiented = baij->roworiented; 447 PetscErrorCode ierr; 448 PetscInt i,j,row,col; 449 PetscInt rstart_orig=mat->rmap->rstart; 450 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 451 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 452 PetscReal tmp; 453 MatScalar **HD = baij->hd,value; 454 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 455 456 PetscFunctionBegin; 457 for (i=0; i<m; i++) { 458 if (PetscDefined(USE_DEBUG)) { 459 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 460 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 461 } 462 row = im[i]; 463 if (row >= rstart_orig && row < rend_orig) { 464 for (j=0; j<n; j++) { 465 col = in[j]; 466 if (roworiented) value = v[i*n+j]; 467 else value = v[i+j*m]; 468 /* Look up PetscInto the Hash Table */ 469 key = (row/bs)*Nbs+(col/bs)+1; 470 h1 = HASH(size,key,tmp); 471 472 473 idx = h1; 474 if (PetscDefined(USE_DEBUG)) { 475 insert_ct++; 476 total_ct++; 477 if (HT[idx] != key) { 478 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 479 if (idx == size) { 480 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 481 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 482 } 483 } 484 } else if (HT[idx] != key) { 485 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 486 if (idx == size) { 487 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 488 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 489 } 490 } 491 /* A HASH table entry is found, so insert the values at the correct address */ 492 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 493 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 494 } 495 } else if (!baij->donotstash) { 496 if (roworiented) { 497 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 498 } else { 499 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 500 } 501 } 502 } 503 if (PetscDefined(USE_DEBUG)) { 504 baij->ht_total_ct += total_ct; 505 baij->ht_insert_ct += insert_ct; 506 } 507 PetscFunctionReturn(0); 508 } 509 510 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 511 { 512 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 513 PetscBool roworiented = baij->roworiented; 514 PetscErrorCode ierr; 515 PetscInt i,j,ii,jj,row,col; 516 PetscInt rstart=baij->rstartbs; 517 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 518 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 519 PetscReal tmp; 520 MatScalar **HD = baij->hd,*baij_a; 521 const PetscScalar *v_t,*value; 522 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 523 524 PetscFunctionBegin; 525 if (roworiented) stepval = (n-1)*bs; 526 else stepval = (m-1)*bs; 527 528 for (i=0; i<m; i++) { 529 if (PetscDefined(USE_DEBUG)) { 530 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 531 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 532 } 533 row = im[i]; 534 v_t = v + i*nbs2; 535 if (row >= rstart && row < rend) { 536 for (j=0; j<n; j++) { 537 col = in[j]; 538 539 /* Look up into the Hash Table */ 540 key = row*Nbs+col+1; 541 h1 = HASH(size,key,tmp); 542 543 idx = h1; 544 if (PetscDefined(USE_DEBUG)) { 545 total_ct++; 546 insert_ct++; 547 if (HT[idx] != key) { 548 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 549 if (idx == size) { 550 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 551 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 552 } 553 } 554 } else if (HT[idx] != key) { 555 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 556 if (idx == size) { 557 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 558 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 559 } 560 } 561 baij_a = HD[idx]; 562 if (roworiented) { 563 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 564 /* value = v + (i*(stepval+bs)+j)*bs; */ 565 value = v_t; 566 v_t += bs; 567 if (addv == ADD_VALUES) { 568 for (ii=0; ii<bs; ii++,value+=stepval) { 569 for (jj=ii; jj<bs2; jj+=bs) { 570 baij_a[jj] += *value++; 571 } 572 } 573 } else { 574 for (ii=0; ii<bs; ii++,value+=stepval) { 575 for (jj=ii; jj<bs2; jj+=bs) { 576 baij_a[jj] = *value++; 577 } 578 } 579 } 580 } else { 581 value = v + j*(stepval+bs)*bs + i*bs; 582 if (addv == ADD_VALUES) { 583 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 584 for (jj=0; jj<bs; jj++) { 585 baij_a[jj] += *value++; 586 } 587 } 588 } else { 589 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 590 for (jj=0; jj<bs; jj++) { 591 baij_a[jj] = *value++; 592 } 593 } 594 } 595 } 596 } 597 } else { 598 if (!baij->donotstash) { 599 if (roworiented) { 600 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 601 } else { 602 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 603 } 604 } 605 } 606 } 607 if (PetscDefined(USE_DEBUG)) { 608 baij->ht_total_ct += total_ct; 609 baij->ht_insert_ct += insert_ct; 610 } 611 PetscFunctionReturn(0); 612 } 613 614 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 615 { 616 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 617 PetscErrorCode ierr; 618 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 619 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 620 621 PetscFunctionBegin; 622 for (i=0; i<m; i++) { 623 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 624 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 625 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 626 row = idxm[i] - bsrstart; 627 for (j=0; j<n; j++) { 628 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 629 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 630 if (idxn[j] >= bscstart && idxn[j] < bscend) { 631 col = idxn[j] - bscstart; 632 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 633 } else { 634 if (!baij->colmap) { 635 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 636 } 637 #if defined(PETSC_USE_CTABLE) 638 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 639 data--; 640 #else 641 data = baij->colmap[idxn[j]/bs]-1; 642 #endif 643 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 644 else { 645 col = data + idxn[j]%bs; 646 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 647 } 648 } 649 } 650 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 651 } 652 PetscFunctionReturn(0); 653 } 654 655 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 656 { 657 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 658 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 659 PetscErrorCode ierr; 660 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 661 PetscReal sum = 0.0; 662 MatScalar *v; 663 664 PetscFunctionBegin; 665 if (baij->size == 1) { 666 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 667 } else { 668 if (type == NORM_FROBENIUS) { 669 v = amat->a; 670 nz = amat->nz*bs2; 671 for (i=0; i<nz; i++) { 672 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 673 } 674 v = bmat->a; 675 nz = bmat->nz*bs2; 676 for (i=0; i<nz; i++) { 677 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 678 } 679 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 680 *nrm = PetscSqrtReal(*nrm); 681 } else if (type == NORM_1) { /* max column sum */ 682 PetscReal *tmp,*tmp2; 683 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 684 ierr = PetscCalloc1(mat->cmap->N,&tmp);CHKERRQ(ierr); 685 ierr = PetscMalloc1(mat->cmap->N,&tmp2);CHKERRQ(ierr); 686 v = amat->a; jj = amat->j; 687 for (i=0; i<amat->nz; i++) { 688 for (j=0; j<bs; j++) { 689 col = bs*(cstart + *jj) + j; /* column index */ 690 for (row=0; row<bs; row++) { 691 tmp[col] += PetscAbsScalar(*v); v++; 692 } 693 } 694 jj++; 695 } 696 v = bmat->a; jj = bmat->j; 697 for (i=0; i<bmat->nz; i++) { 698 for (j=0; j<bs; j++) { 699 col = bs*garray[*jj] + j; 700 for (row=0; row<bs; row++) { 701 tmp[col] += PetscAbsScalar(*v); v++; 702 } 703 } 704 jj++; 705 } 706 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 707 *nrm = 0.0; 708 for (j=0; j<mat->cmap->N; j++) { 709 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 710 } 711 ierr = PetscFree(tmp);CHKERRQ(ierr); 712 ierr = PetscFree(tmp2);CHKERRQ(ierr); 713 } else if (type == NORM_INFINITY) { /* max row sum */ 714 PetscReal *sums; 715 ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr); 716 sum = 0.0; 717 for (j=0; j<amat->mbs; j++) { 718 for (row=0; row<bs; row++) sums[row] = 0.0; 719 v = amat->a + bs2*amat->i[j]; 720 nz = amat->i[j+1]-amat->i[j]; 721 for (i=0; i<nz; i++) { 722 for (col=0; col<bs; col++) { 723 for (row=0; row<bs; row++) { 724 sums[row] += PetscAbsScalar(*v); v++; 725 } 726 } 727 } 728 v = bmat->a + bs2*bmat->i[j]; 729 nz = bmat->i[j+1]-bmat->i[j]; 730 for (i=0; i<nz; i++) { 731 for (col=0; col<bs; col++) { 732 for (row=0; row<bs; row++) { 733 sums[row] += PetscAbsScalar(*v); v++; 734 } 735 } 736 } 737 for (row=0; row<bs; row++) { 738 if (sums[row] > sum) sum = sums[row]; 739 } 740 } 741 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 742 ierr = PetscFree(sums);CHKERRQ(ierr); 743 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 744 } 745 PetscFunctionReturn(0); 746 } 747 748 /* 749 Creates the hash table, and sets the table 750 This table is created only once. 751 If new entried need to be added to the matrix 752 then the hash table has to be destroyed and 753 recreated. 754 */ 755 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 756 { 757 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 758 Mat A = baij->A,B=baij->B; 759 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 760 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 761 PetscErrorCode ierr; 762 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 763 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 764 PetscInt *HT,key; 765 MatScalar **HD; 766 PetscReal tmp; 767 #if defined(PETSC_USE_INFO) 768 PetscInt ct=0,max=0; 769 #endif 770 771 PetscFunctionBegin; 772 if (baij->ht) PetscFunctionReturn(0); 773 774 baij->ht_size = (PetscInt)(factor*nz); 775 ht_size = baij->ht_size; 776 777 /* Allocate Memory for Hash Table */ 778 ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr); 779 HD = baij->hd; 780 HT = baij->ht; 781 782 /* Loop Over A */ 783 for (i=0; i<a->mbs; i++) { 784 for (j=ai[i]; j<ai[i+1]; j++) { 785 row = i+rstart; 786 col = aj[j]+cstart; 787 788 key = row*Nbs + col + 1; 789 h1 = HASH(ht_size,key,tmp); 790 for (k=0; k<ht_size; k++) { 791 if (!HT[(h1+k)%ht_size]) { 792 HT[(h1+k)%ht_size] = key; 793 HD[(h1+k)%ht_size] = a->a + j*bs2; 794 break; 795 #if defined(PETSC_USE_INFO) 796 } else { 797 ct++; 798 #endif 799 } 800 } 801 #if defined(PETSC_USE_INFO) 802 if (k> max) max = k; 803 #endif 804 } 805 } 806 /* Loop Over B */ 807 for (i=0; i<b->mbs; i++) { 808 for (j=bi[i]; j<bi[i+1]; j++) { 809 row = i+rstart; 810 col = garray[bj[j]]; 811 key = row*Nbs + col + 1; 812 h1 = HASH(ht_size,key,tmp); 813 for (k=0; k<ht_size; k++) { 814 if (!HT[(h1+k)%ht_size]) { 815 HT[(h1+k)%ht_size] = key; 816 HD[(h1+k)%ht_size] = b->a + j*bs2; 817 break; 818 #if defined(PETSC_USE_INFO) 819 } else { 820 ct++; 821 #endif 822 } 823 } 824 #if defined(PETSC_USE_INFO) 825 if (k> max) max = k; 826 #endif 827 } 828 } 829 830 /* Print Summary */ 831 #if defined(PETSC_USE_INFO) 832 for (i=0,j=0; i<ht_size; i++) { 833 if (HT[i]) j++; 834 } 835 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 836 #endif 837 PetscFunctionReturn(0); 838 } 839 840 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 841 { 842 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 843 PetscErrorCode ierr; 844 PetscInt nstash,reallocs; 845 846 PetscFunctionBegin; 847 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 848 849 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 850 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 851 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 852 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 853 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 854 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 855 PetscFunctionReturn(0); 856 } 857 858 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 859 { 860 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 861 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 862 PetscErrorCode ierr; 863 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 864 PetscInt *row,*col; 865 PetscBool r1,r2,r3,other_disassembled; 866 MatScalar *val; 867 PetscMPIInt n; 868 869 PetscFunctionBegin; 870 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 871 if (!baij->donotstash && !mat->nooffprocentries) { 872 while (1) { 873 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 874 if (!flg) break; 875 876 for (i=0; i<n;) { 877 /* Now identify the consecutive vals belonging to the same row */ 878 for (j=i,rstart=row[j]; j<n; j++) { 879 if (row[j] != rstart) break; 880 } 881 if (j < n) ncols = j-i; 882 else ncols = n-i; 883 /* Now assemble all these values with a single function call */ 884 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 885 i = j; 886 } 887 } 888 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 889 /* Now process the block-stash. Since the values are stashed column-oriented, 890 set the roworiented flag to column oriented, and after MatSetValues() 891 restore the original flags */ 892 r1 = baij->roworiented; 893 r2 = a->roworiented; 894 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 895 896 baij->roworiented = PETSC_FALSE; 897 a->roworiented = PETSC_FALSE; 898 899 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 900 while (1) { 901 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 902 if (!flg) break; 903 904 for (i=0; i<n;) { 905 /* Now identify the consecutive vals belonging to the same row */ 906 for (j=i,rstart=row[j]; j<n; j++) { 907 if (row[j] != rstart) break; 908 } 909 if (j < n) ncols = j-i; 910 else ncols = n-i; 911 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 912 i = j; 913 } 914 } 915 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 916 917 baij->roworiented = r1; 918 a->roworiented = r2; 919 920 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 921 } 922 923 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 924 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 925 926 /* determine if any processor has disassembled, if so we must 927 also disassemble ourselfs, in order that we may reassemble. */ 928 /* 929 if nonzero structure of submatrix B cannot change then we know that 930 no processor disassembled thus we can skip this stuff 931 */ 932 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 933 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 934 if (mat->was_assembled && !other_disassembled) { 935 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 936 } 937 } 938 939 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 940 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 941 } 942 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 943 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 944 945 #if defined(PETSC_USE_INFO) 946 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 947 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 948 949 baij->ht_total_ct = 0; 950 baij->ht_insert_ct = 0; 951 } 952 #endif 953 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 954 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 955 956 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 957 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 958 } 959 960 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 961 962 baij->rowvalues = 0; 963 964 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 965 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 966 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 967 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 968 } 969 PetscFunctionReturn(0); 970 } 971 972 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer); 973 #include <petscdraw.h> 974 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 975 { 976 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 977 PetscErrorCode ierr; 978 PetscMPIInt rank = baij->rank; 979 PetscInt bs = mat->rmap->bs; 980 PetscBool iascii,isdraw; 981 PetscViewer sviewer; 982 PetscViewerFormat format; 983 984 PetscFunctionBegin; 985 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 986 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 987 if (iascii) { 988 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 989 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 990 MatInfo info; 991 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 992 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 993 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 994 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n", 995 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr); 996 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 997 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 998 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 999 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1000 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1001 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1002 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1003 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1004 PetscFunctionReturn(0); 1005 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1006 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1007 PetscFunctionReturn(0); 1008 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1009 PetscFunctionReturn(0); 1010 } 1011 } 1012 1013 if (isdraw) { 1014 PetscDraw draw; 1015 PetscBool isnull; 1016 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1017 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1018 if (isnull) PetscFunctionReturn(0); 1019 } 1020 1021 { 1022 /* assemble the entire matrix onto first processor. */ 1023 Mat A; 1024 Mat_SeqBAIJ *Aloc; 1025 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1026 MatScalar *a; 1027 const char *matname; 1028 1029 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1030 /* Perhaps this should be the type of mat? */ 1031 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1032 if (!rank) { 1033 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1034 } else { 1035 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1036 } 1037 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1038 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 1039 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1040 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1041 1042 /* copy over the A part */ 1043 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1044 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1045 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1046 1047 for (i=0; i<mbs; i++) { 1048 rvals[0] = bs*(baij->rstartbs + i); 1049 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1050 for (j=ai[i]; j<ai[i+1]; j++) { 1051 col = (baij->cstartbs+aj[j])*bs; 1052 for (k=0; k<bs; k++) { 1053 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1054 col++; a += bs; 1055 } 1056 } 1057 } 1058 /* copy over the B part */ 1059 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1060 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1061 for (i=0; i<mbs; i++) { 1062 rvals[0] = bs*(baij->rstartbs + i); 1063 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1064 for (j=ai[i]; j<ai[i+1]; j++) { 1065 col = baij->garray[aj[j]]*bs; 1066 for (k=0; k<bs; k++) { 1067 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1068 col++; a += bs; 1069 } 1070 } 1071 } 1072 ierr = PetscFree(rvals);CHKERRQ(ierr); 1073 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1074 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1075 /* 1076 Everyone has to call to draw the matrix since the graphics waits are 1077 synchronized across all processors that share the PetscDraw object 1078 */ 1079 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1080 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 1081 if (!rank) { 1082 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 1083 ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1084 } 1085 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1086 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1087 ierr = MatDestroy(&A);CHKERRQ(ierr); 1088 } 1089 PetscFunctionReturn(0); 1090 } 1091 1092 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1093 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1094 { 1095 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 1096 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)aij->A->data; 1097 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)aij->B->data; 1098 const PetscInt *garray = aij->garray; 1099 PetscInt header[4],M,N,m,rs,cs,bs,nz,cnt,i,j,ja,jb,k,l; 1100 PetscInt *rowlens,*colidxs; 1101 PetscScalar *matvals; 1102 PetscErrorCode ierr; 1103 1104 PetscFunctionBegin; 1105 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1106 1107 M = mat->rmap->N; 1108 N = mat->cmap->N; 1109 m = mat->rmap->n; 1110 rs = mat->rmap->rstart; 1111 cs = mat->cmap->rstart; 1112 bs = mat->rmap->bs; 1113 nz = bs*bs*(A->nz + B->nz); 1114 1115 /* write matrix header */ 1116 header[0] = MAT_FILE_CLASSID; 1117 header[1] = M; header[2] = N; header[3] = nz; 1118 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1119 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1120 1121 /* fill in and store row lengths */ 1122 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1123 for (cnt=0, i=0; i<A->mbs; i++) 1124 for (j=0; j<bs; j++) 1125 rowlens[cnt++] = bs*(A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]); 1126 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1127 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1128 1129 /* fill in and store column indices */ 1130 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1131 for (cnt=0, i=0; i<A->mbs; i++) { 1132 for (k=0; k<bs; k++) { 1133 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1134 if (garray[B->j[jb]] > cs/bs) break; 1135 for (l=0; l<bs; l++) 1136 colidxs[cnt++] = bs*garray[B->j[jb]] + l; 1137 } 1138 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1139 for (l=0; l<bs; l++) 1140 colidxs[cnt++] = bs*A->j[ja] + l + cs; 1141 for (; jb<B->i[i+1]; jb++) 1142 for (l=0; l<bs; l++) 1143 colidxs[cnt++] = bs*garray[B->j[jb]] + l; 1144 } 1145 } 1146 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1147 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_INT);CHKERRQ(ierr); 1148 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1149 1150 /* fill in and store nonzero values */ 1151 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1152 for (cnt=0, i=0; i<A->mbs; i++) { 1153 for (k=0; k<bs; k++) { 1154 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1155 if (garray[B->j[jb]] > cs/bs) break; 1156 for (l=0; l<bs; l++) 1157 matvals[cnt++] = B->a[bs*(bs*jb + l) + k]; 1158 } 1159 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1160 for (l=0; l<bs; l++) 1161 matvals[cnt++] = A->a[bs*(bs*ja + l) + k]; 1162 for (; jb<B->i[i+1]; jb++) 1163 for (l=0; l<bs; l++) 1164 matvals[cnt++] = B->a[bs*(bs*jb + l) + k]; 1165 } 1166 } 1167 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_SCALAR);CHKERRQ(ierr); 1168 ierr = PetscFree(matvals);CHKERRQ(ierr); 1169 1170 /* write block size option to the viewer's .info file */ 1171 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1172 PetscFunctionReturn(0); 1173 } 1174 1175 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1176 { 1177 PetscErrorCode ierr; 1178 PetscBool iascii,isdraw,issocket,isbinary; 1179 1180 PetscFunctionBegin; 1181 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1182 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1183 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1184 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1185 if (iascii || isdraw || issocket) { 1186 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1187 } else if (isbinary) { 1188 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1189 } 1190 PetscFunctionReturn(0); 1191 } 1192 1193 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1194 { 1195 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1196 PetscErrorCode ierr; 1197 1198 PetscFunctionBegin; 1199 #if defined(PETSC_USE_LOG) 1200 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1201 #endif 1202 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1203 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1204 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1205 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1206 #if defined(PETSC_USE_CTABLE) 1207 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1208 #else 1209 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1210 #endif 1211 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1212 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1213 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1214 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1215 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1216 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1217 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1218 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1219 1220 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1222 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1223 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1224 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1225 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1226 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr); 1227 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1228 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr); 1229 #if defined(PETSC_HAVE_HYPRE) 1230 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL);CHKERRQ(ierr); 1231 #endif 1232 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL);CHKERRQ(ierr); 1233 PetscFunctionReturn(0); 1234 } 1235 1236 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1237 { 1238 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1239 PetscErrorCode ierr; 1240 PetscInt nt; 1241 1242 PetscFunctionBegin; 1243 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1244 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1245 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1246 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1247 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1248 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1249 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1250 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1251 PetscFunctionReturn(0); 1252 } 1253 1254 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1255 { 1256 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1257 PetscErrorCode ierr; 1258 1259 PetscFunctionBegin; 1260 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1261 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1262 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1263 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1264 PetscFunctionReturn(0); 1265 } 1266 1267 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1268 { 1269 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1270 PetscErrorCode ierr; 1271 1272 PetscFunctionBegin; 1273 /* do nondiagonal part */ 1274 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1275 /* do local part */ 1276 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1277 /* add partial results together */ 1278 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1279 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1280 PetscFunctionReturn(0); 1281 } 1282 1283 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1284 { 1285 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1286 PetscErrorCode ierr; 1287 1288 PetscFunctionBegin; 1289 /* do nondiagonal part */ 1290 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1291 /* do local part */ 1292 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1293 /* add partial results together */ 1294 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1295 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1296 PetscFunctionReturn(0); 1297 } 1298 1299 /* 1300 This only works correctly for square matrices where the subblock A->A is the 1301 diagonal block 1302 */ 1303 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1304 { 1305 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1306 PetscErrorCode ierr; 1307 1308 PetscFunctionBegin; 1309 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1310 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1311 PetscFunctionReturn(0); 1312 } 1313 1314 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1315 { 1316 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1317 PetscErrorCode ierr; 1318 1319 PetscFunctionBegin; 1320 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1321 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1322 PetscFunctionReturn(0); 1323 } 1324 1325 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1326 { 1327 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1328 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1329 PetscErrorCode ierr; 1330 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1331 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1332 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1333 1334 PetscFunctionBegin; 1335 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1336 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1337 mat->getrowactive = PETSC_TRUE; 1338 1339 if (!mat->rowvalues && (idx || v)) { 1340 /* 1341 allocate enough space to hold information from the longest row. 1342 */ 1343 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1344 PetscInt max = 1,mbs = mat->mbs,tmp; 1345 for (i=0; i<mbs; i++) { 1346 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1347 if (max < tmp) max = tmp; 1348 } 1349 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1350 } 1351 lrow = row - brstart; 1352 1353 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1354 if (!v) {pvA = 0; pvB = 0;} 1355 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1356 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1357 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1358 nztot = nzA + nzB; 1359 1360 cmap = mat->garray; 1361 if (v || idx) { 1362 if (nztot) { 1363 /* Sort by increasing column numbers, assuming A and B already sorted */ 1364 PetscInt imark = -1; 1365 if (v) { 1366 *v = v_p = mat->rowvalues; 1367 for (i=0; i<nzB; i++) { 1368 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1369 else break; 1370 } 1371 imark = i; 1372 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1373 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1374 } 1375 if (idx) { 1376 *idx = idx_p = mat->rowindices; 1377 if (imark > -1) { 1378 for (i=0; i<imark; i++) { 1379 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1380 } 1381 } else { 1382 for (i=0; i<nzB; i++) { 1383 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1384 else break; 1385 } 1386 imark = i; 1387 } 1388 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1389 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1390 } 1391 } else { 1392 if (idx) *idx = 0; 1393 if (v) *v = 0; 1394 } 1395 } 1396 *nz = nztot; 1397 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1398 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1399 PetscFunctionReturn(0); 1400 } 1401 1402 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1403 { 1404 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1405 1406 PetscFunctionBegin; 1407 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1408 baij->getrowactive = PETSC_FALSE; 1409 PetscFunctionReturn(0); 1410 } 1411 1412 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1413 { 1414 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1415 PetscErrorCode ierr; 1416 1417 PetscFunctionBegin; 1418 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1419 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1420 PetscFunctionReturn(0); 1421 } 1422 1423 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1424 { 1425 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1426 Mat A = a->A,B = a->B; 1427 PetscErrorCode ierr; 1428 PetscLogDouble isend[5],irecv[5]; 1429 1430 PetscFunctionBegin; 1431 info->block_size = (PetscReal)matin->rmap->bs; 1432 1433 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1434 1435 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1436 isend[3] = info->memory; isend[4] = info->mallocs; 1437 1438 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1439 1440 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1441 isend[3] += info->memory; isend[4] += info->mallocs; 1442 1443 if (flag == MAT_LOCAL) { 1444 info->nz_used = isend[0]; 1445 info->nz_allocated = isend[1]; 1446 info->nz_unneeded = isend[2]; 1447 info->memory = isend[3]; 1448 info->mallocs = isend[4]; 1449 } else if (flag == MAT_GLOBAL_MAX) { 1450 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1451 1452 info->nz_used = irecv[0]; 1453 info->nz_allocated = irecv[1]; 1454 info->nz_unneeded = irecv[2]; 1455 info->memory = irecv[3]; 1456 info->mallocs = irecv[4]; 1457 } else if (flag == MAT_GLOBAL_SUM) { 1458 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1459 1460 info->nz_used = irecv[0]; 1461 info->nz_allocated = irecv[1]; 1462 info->nz_unneeded = irecv[2]; 1463 info->memory = irecv[3]; 1464 info->mallocs = irecv[4]; 1465 } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1466 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1467 info->fill_ratio_needed = 0; 1468 info->factor_mallocs = 0; 1469 PetscFunctionReturn(0); 1470 } 1471 1472 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1473 { 1474 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1475 PetscErrorCode ierr; 1476 1477 PetscFunctionBegin; 1478 switch (op) { 1479 case MAT_NEW_NONZERO_LOCATIONS: 1480 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1481 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1482 case MAT_KEEP_NONZERO_PATTERN: 1483 case MAT_NEW_NONZERO_LOCATION_ERR: 1484 MatCheckPreallocated(A,1); 1485 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1486 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1487 break; 1488 case MAT_ROW_ORIENTED: 1489 MatCheckPreallocated(A,1); 1490 a->roworiented = flg; 1491 1492 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1493 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1494 break; 1495 case MAT_NEW_DIAGONALS: 1496 case MAT_SORTED_FULL: 1497 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1498 break; 1499 case MAT_IGNORE_OFF_PROC_ENTRIES: 1500 a->donotstash = flg; 1501 break; 1502 case MAT_USE_HASH_TABLE: 1503 a->ht_flag = flg; 1504 a->ht_fact = 1.39; 1505 break; 1506 case MAT_SYMMETRIC: 1507 case MAT_STRUCTURALLY_SYMMETRIC: 1508 case MAT_HERMITIAN: 1509 case MAT_SUBMAT_SINGLEIS: 1510 case MAT_SYMMETRY_ETERNAL: 1511 MatCheckPreallocated(A,1); 1512 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1513 break; 1514 default: 1515 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1516 } 1517 PetscFunctionReturn(0); 1518 } 1519 1520 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1521 { 1522 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1523 Mat_SeqBAIJ *Aloc; 1524 Mat B; 1525 PetscErrorCode ierr; 1526 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1527 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1528 MatScalar *a; 1529 1530 PetscFunctionBegin; 1531 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1532 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1533 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1534 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1535 /* Do not know preallocation information, but must set block size */ 1536 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr); 1537 } else { 1538 B = *matout; 1539 } 1540 1541 /* copy over the A part */ 1542 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1543 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1544 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1545 1546 for (i=0; i<mbs; i++) { 1547 rvals[0] = bs*(baij->rstartbs + i); 1548 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1549 for (j=ai[i]; j<ai[i+1]; j++) { 1550 col = (baij->cstartbs+aj[j])*bs; 1551 for (k=0; k<bs; k++) { 1552 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1553 1554 col++; a += bs; 1555 } 1556 } 1557 } 1558 /* copy over the B part */ 1559 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1560 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1561 for (i=0; i<mbs; i++) { 1562 rvals[0] = bs*(baij->rstartbs + i); 1563 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1564 for (j=ai[i]; j<ai[i+1]; j++) { 1565 col = baij->garray[aj[j]]*bs; 1566 for (k=0; k<bs; k++) { 1567 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1568 col++; 1569 a += bs; 1570 } 1571 } 1572 } 1573 ierr = PetscFree(rvals);CHKERRQ(ierr); 1574 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1575 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1576 1577 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1578 else { 1579 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 1580 } 1581 PetscFunctionReturn(0); 1582 } 1583 1584 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1585 { 1586 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1587 Mat a = baij->A,b = baij->B; 1588 PetscErrorCode ierr; 1589 PetscInt s1,s2,s3; 1590 1591 PetscFunctionBegin; 1592 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1593 if (rr) { 1594 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1595 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1596 /* Overlap communication with computation. */ 1597 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1598 } 1599 if (ll) { 1600 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1601 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1602 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1603 } 1604 /* scale the diagonal block */ 1605 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1606 1607 if (rr) { 1608 /* Do a scatter end and then right scale the off-diagonal block */ 1609 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1610 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1611 } 1612 PetscFunctionReturn(0); 1613 } 1614 1615 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1616 { 1617 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1618 PetscInt *lrows; 1619 PetscInt r, len; 1620 PetscBool cong; 1621 PetscErrorCode ierr; 1622 1623 PetscFunctionBegin; 1624 /* get locally owned rows */ 1625 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 1626 /* fix right hand side if needed */ 1627 if (x && b) { 1628 const PetscScalar *xx; 1629 PetscScalar *bb; 1630 1631 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1632 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1633 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 1634 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1635 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1636 } 1637 1638 /* actually zap the local rows */ 1639 /* 1640 Zero the required rows. If the "diagonal block" of the matrix 1641 is square and the user wishes to set the diagonal we use separate 1642 code so that MatSetValues() is not called for each diagonal allocating 1643 new memory, thus calling lots of mallocs and slowing things down. 1644 1645 */ 1646 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1647 ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1648 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 1649 if ((diag != 0.0) && cong) { 1650 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr); 1651 } else if (diag != 0.0) { 1652 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr); 1653 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1654 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1655 for (r = 0; r < len; ++r) { 1656 const PetscInt row = lrows[r] + A->rmap->rstart; 1657 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1658 } 1659 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1660 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1661 } else { 1662 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1663 } 1664 ierr = PetscFree(lrows);CHKERRQ(ierr); 1665 1666 /* only change matrix nonzero state if pattern was allowed to be changed */ 1667 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1668 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1669 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1670 } 1671 PetscFunctionReturn(0); 1672 } 1673 1674 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1675 { 1676 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1677 PetscErrorCode ierr; 1678 PetscMPIInt n = A->rmap->n,p = 0; 1679 PetscInt i,j,k,r,len = 0,row,col,count; 1680 PetscInt *lrows,*owners = A->rmap->range; 1681 PetscSFNode *rrows; 1682 PetscSF sf; 1683 const PetscScalar *xx; 1684 PetscScalar *bb,*mask; 1685 Vec xmask,lmask; 1686 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)l->B->data; 1687 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1688 PetscScalar *aa; 1689 1690 PetscFunctionBegin; 1691 /* Create SF where leaves are input rows and roots are owned rows */ 1692 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1693 for (r = 0; r < n; ++r) lrows[r] = -1; 1694 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1695 for (r = 0; r < N; ++r) { 1696 const PetscInt idx = rows[r]; 1697 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1698 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1699 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1700 } 1701 rrows[r].rank = p; 1702 rrows[r].index = rows[r] - owners[p]; 1703 } 1704 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1705 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1706 /* Collect flags for rows to be zeroed */ 1707 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1708 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1709 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1710 /* Compress and put in row numbers */ 1711 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1712 /* zero diagonal part of matrix */ 1713 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1714 /* handle off diagonal part of matrix */ 1715 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1716 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1717 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1718 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1719 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1720 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1721 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1722 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1723 if (x) { 1724 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1725 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1726 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1727 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1728 } 1729 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1730 /* remove zeroed rows of off diagonal matrix */ 1731 for (i = 0; i < len; ++i) { 1732 row = lrows[i]; 1733 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1734 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 1735 for (k = 0; k < count; ++k) { 1736 aa[0] = 0.0; 1737 aa += bs; 1738 } 1739 } 1740 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1741 for (i = 0; i < l->B->rmap->N; ++i) { 1742 row = i/bs; 1743 for (j = baij->i[row]; j < baij->i[row+1]; ++j) { 1744 for (k = 0; k < bs; ++k) { 1745 col = bs*baij->j[j] + k; 1746 if (PetscAbsScalar(mask[col])) { 1747 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 1748 if (x) bb[i] -= aa[0]*xx[col]; 1749 aa[0] = 0.0; 1750 } 1751 } 1752 } 1753 } 1754 if (x) { 1755 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1756 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1757 } 1758 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1759 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1760 ierr = PetscFree(lrows);CHKERRQ(ierr); 1761 1762 /* only change matrix nonzero state if pattern was allowed to be changed */ 1763 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1764 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1765 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1766 } 1767 PetscFunctionReturn(0); 1768 } 1769 1770 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1771 { 1772 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1773 PetscErrorCode ierr; 1774 1775 PetscFunctionBegin; 1776 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1777 PetscFunctionReturn(0); 1778 } 1779 1780 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 1781 1782 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1783 { 1784 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1785 Mat a,b,c,d; 1786 PetscBool flg; 1787 PetscErrorCode ierr; 1788 1789 PetscFunctionBegin; 1790 a = matA->A; b = matA->B; 1791 c = matB->A; d = matB->B; 1792 1793 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1794 if (flg) { 1795 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1796 } 1797 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1798 PetscFunctionReturn(0); 1799 } 1800 1801 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1802 { 1803 PetscErrorCode ierr; 1804 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1805 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 1806 1807 PetscFunctionBegin; 1808 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1809 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1810 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1811 } else { 1812 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1813 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1814 } 1815 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 1816 PetscFunctionReturn(0); 1817 } 1818 1819 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1820 { 1821 PetscErrorCode ierr; 1822 1823 PetscFunctionBegin; 1824 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1825 PetscFunctionReturn(0); 1826 } 1827 1828 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 1829 { 1830 PetscErrorCode ierr; 1831 PetscInt bs = Y->rmap->bs,m = Y->rmap->N/bs; 1832 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 1833 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 1834 1835 PetscFunctionBegin; 1836 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 1837 PetscFunctionReturn(0); 1838 } 1839 1840 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1841 { 1842 PetscErrorCode ierr; 1843 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 1844 PetscBLASInt bnz,one=1; 1845 Mat_SeqBAIJ *x,*y; 1846 PetscInt bs2 = Y->rmap->bs*Y->rmap->bs; 1847 1848 PetscFunctionBegin; 1849 if (str == SAME_NONZERO_PATTERN) { 1850 PetscScalar alpha = a; 1851 x = (Mat_SeqBAIJ*)xx->A->data; 1852 y = (Mat_SeqBAIJ*)yy->A->data; 1853 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 1854 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1855 x = (Mat_SeqBAIJ*)xx->B->data; 1856 y = (Mat_SeqBAIJ*)yy->B->data; 1857 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 1858 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1859 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1860 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1861 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1862 } else { 1863 Mat B; 1864 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1865 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1866 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1867 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1868 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1869 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1870 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1871 ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr); 1872 ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1873 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1874 ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1875 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1876 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1877 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 1878 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1879 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1880 } 1881 PetscFunctionReturn(0); 1882 } 1883 1884 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1885 { 1886 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1887 PetscErrorCode ierr; 1888 1889 PetscFunctionBegin; 1890 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1891 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1892 PetscFunctionReturn(0); 1893 } 1894 1895 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1896 { 1897 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1898 PetscErrorCode ierr; 1899 1900 PetscFunctionBegin; 1901 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1902 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1903 PetscFunctionReturn(0); 1904 } 1905 1906 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1907 { 1908 PetscErrorCode ierr; 1909 IS iscol_local; 1910 PetscInt csize; 1911 1912 PetscFunctionBegin; 1913 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1914 if (call == MAT_REUSE_MATRIX) { 1915 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1916 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1917 } else { 1918 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1919 } 1920 ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1921 if (call == MAT_INITIAL_MATRIX) { 1922 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1923 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1924 } 1925 PetscFunctionReturn(0); 1926 } 1927 1928 /* 1929 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1930 in local and then by concatenating the local matrices the end result. 1931 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1932 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1933 */ 1934 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 1935 { 1936 PetscErrorCode ierr; 1937 PetscMPIInt rank,size; 1938 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 1939 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 1940 Mat M,Mreuse; 1941 MatScalar *vwork,*aa; 1942 MPI_Comm comm; 1943 IS isrow_new, iscol_new; 1944 Mat_SeqBAIJ *aij; 1945 1946 PetscFunctionBegin; 1947 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 1948 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1949 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1950 /* The compression and expansion should be avoided. Doesn't point 1951 out errors, might change the indices, hence buggey */ 1952 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 1953 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 1954 1955 if (call == MAT_REUSE_MATRIX) { 1956 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 1957 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1958 ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse);CHKERRQ(ierr); 1959 } else { 1960 ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse);CHKERRQ(ierr); 1961 } 1962 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 1963 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 1964 /* 1965 m - number of local rows 1966 n - number of columns (same on all processors) 1967 rstart - first row in new global matrix generated 1968 */ 1969 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 1970 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1971 m = m/bs; 1972 n = n/bs; 1973 1974 if (call == MAT_INITIAL_MATRIX) { 1975 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1976 ii = aij->i; 1977 jj = aij->j; 1978 1979 /* 1980 Determine the number of non-zeros in the diagonal and off-diagonal 1981 portions of the matrix in order to do correct preallocation 1982 */ 1983 1984 /* first get start and end of "diagonal" columns */ 1985 if (csize == PETSC_DECIDE) { 1986 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 1987 if (mglobal == n*bs) { /* square matrix */ 1988 nlocal = m; 1989 } else { 1990 nlocal = n/size + ((n % size) > rank); 1991 } 1992 } else { 1993 nlocal = csize/bs; 1994 } 1995 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1996 rstart = rend - nlocal; 1997 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 1998 1999 /* next, compute all the lengths */ 2000 ierr = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr); 2001 for (i=0; i<m; i++) { 2002 jend = ii[i+1] - ii[i]; 2003 olen = 0; 2004 dlen = 0; 2005 for (j=0; j<jend; j++) { 2006 if (*jj < rstart || *jj >= rend) olen++; 2007 else dlen++; 2008 jj++; 2009 } 2010 olens[i] = olen; 2011 dlens[i] = dlen; 2012 } 2013 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2014 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2015 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2016 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2017 ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2018 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 2019 } else { 2020 PetscInt ml,nl; 2021 2022 M = *newmat; 2023 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2024 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2025 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2026 /* 2027 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2028 rather than the slower MatSetValues(). 2029 */ 2030 M->was_assembled = PETSC_TRUE; 2031 M->assembled = PETSC_FALSE; 2032 } 2033 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2034 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2035 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2036 ii = aij->i; 2037 jj = aij->j; 2038 aa = aij->a; 2039 for (i=0; i<m; i++) { 2040 row = rstart/bs + i; 2041 nz = ii[i+1] - ii[i]; 2042 cwork = jj; jj += nz; 2043 vwork = aa; aa += nz*bs*bs; 2044 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2045 } 2046 2047 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2048 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2049 *newmat = M; 2050 2051 /* save submatrix used in processor for next request */ 2052 if (call == MAT_INITIAL_MATRIX) { 2053 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2054 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2055 } 2056 PetscFunctionReturn(0); 2057 } 2058 2059 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2060 { 2061 MPI_Comm comm,pcomm; 2062 PetscInt clocal_size,nrows; 2063 const PetscInt *rows; 2064 PetscMPIInt size; 2065 IS crowp,lcolp; 2066 PetscErrorCode ierr; 2067 2068 PetscFunctionBegin; 2069 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2070 /* make a collective version of 'rowp' */ 2071 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2072 if (pcomm==comm) { 2073 crowp = rowp; 2074 } else { 2075 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2076 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2077 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2078 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2079 } 2080 ierr = ISSetPermutation(crowp);CHKERRQ(ierr); 2081 /* make a local version of 'colp' */ 2082 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2083 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2084 if (size==1) { 2085 lcolp = colp; 2086 } else { 2087 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 2088 } 2089 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2090 /* now we just get the submatrix */ 2091 ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr); 2092 ierr = MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2093 /* clean up */ 2094 if (pcomm!=comm) { 2095 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2096 } 2097 if (size>1) { 2098 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2099 } 2100 PetscFunctionReturn(0); 2101 } 2102 2103 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2104 { 2105 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2106 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2107 2108 PetscFunctionBegin; 2109 if (nghosts) *nghosts = B->nbs; 2110 if (ghosts) *ghosts = baij->garray; 2111 PetscFunctionReturn(0); 2112 } 2113 2114 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2115 { 2116 Mat B; 2117 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2118 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2119 Mat_SeqAIJ *b; 2120 PetscErrorCode ierr; 2121 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2122 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2123 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2124 2125 PetscFunctionBegin; 2126 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2127 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2128 2129 /* ---------------------------------------------------------------- 2130 Tell every processor the number of nonzeros per row 2131 */ 2132 ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr); 2133 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2134 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2135 } 2136 ierr = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr); 2137 displs = recvcounts + size; 2138 for (i=0; i<size; i++) { 2139 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2140 displs[i] = A->rmap->range[i]/bs; 2141 } 2142 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2143 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2144 #else 2145 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2146 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2147 #endif 2148 /* --------------------------------------------------------------- 2149 Create the sequential matrix of the same type as the local block diagonal 2150 */ 2151 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2152 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2153 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2154 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2155 b = (Mat_SeqAIJ*)B->data; 2156 2157 /*-------------------------------------------------------------------- 2158 Copy my part of matrix column indices over 2159 */ 2160 sendcount = ad->nz + bd->nz; 2161 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2162 a_jsendbuf = ad->j; 2163 b_jsendbuf = bd->j; 2164 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2165 cnt = 0; 2166 for (i=0; i<n; i++) { 2167 2168 /* put in lower diagonal portion */ 2169 m = bd->i[i+1] - bd->i[i]; 2170 while (m > 0) { 2171 /* is it above diagonal (in bd (compressed) numbering) */ 2172 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2173 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2174 m--; 2175 } 2176 2177 /* put in diagonal portion */ 2178 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2179 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2180 } 2181 2182 /* put in upper diagonal portion */ 2183 while (m-- > 0) { 2184 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2185 } 2186 } 2187 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2188 2189 /*-------------------------------------------------------------------- 2190 Gather all column indices to all processors 2191 */ 2192 for (i=0; i<size; i++) { 2193 recvcounts[i] = 0; 2194 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2195 recvcounts[i] += lens[j]; 2196 } 2197 } 2198 displs[0] = 0; 2199 for (i=1; i<size; i++) { 2200 displs[i] = displs[i-1] + recvcounts[i-1]; 2201 } 2202 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2203 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2204 #else 2205 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2206 #endif 2207 /*-------------------------------------------------------------------- 2208 Assemble the matrix into useable form (note numerical values not yet set) 2209 */ 2210 /* set the b->ilen (length of each row) values */ 2211 ierr = PetscArraycpy(b->ilen,lens,A->rmap->N/bs);CHKERRQ(ierr); 2212 /* set the b->i indices */ 2213 b->i[0] = 0; 2214 for (i=1; i<=A->rmap->N/bs; i++) { 2215 b->i[i] = b->i[i-1] + lens[i-1]; 2216 } 2217 ierr = PetscFree(lens);CHKERRQ(ierr); 2218 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2219 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2220 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2221 2222 if (A->symmetric) { 2223 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2224 } else if (A->hermitian) { 2225 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2226 } else if (A->structurally_symmetric) { 2227 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2228 } 2229 *newmat = B; 2230 PetscFunctionReturn(0); 2231 } 2232 2233 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2234 { 2235 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2236 PetscErrorCode ierr; 2237 Vec bb1 = 0; 2238 2239 PetscFunctionBegin; 2240 if (flag == SOR_APPLY_UPPER) { 2241 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2242 PetscFunctionReturn(0); 2243 } 2244 2245 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2246 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2247 } 2248 2249 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2250 if (flag & SOR_ZERO_INITIAL_GUESS) { 2251 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2252 its--; 2253 } 2254 2255 while (its--) { 2256 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2257 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2258 2259 /* update rhs: bb1 = bb - B*x */ 2260 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2261 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2262 2263 /* local sweep */ 2264 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2265 } 2266 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2267 if (flag & SOR_ZERO_INITIAL_GUESS) { 2268 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2269 its--; 2270 } 2271 while (its--) { 2272 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2273 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2274 2275 /* update rhs: bb1 = bb - B*x */ 2276 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2277 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2278 2279 /* local sweep */ 2280 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2281 } 2282 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2283 if (flag & SOR_ZERO_INITIAL_GUESS) { 2284 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2285 its--; 2286 } 2287 while (its--) { 2288 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2289 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2290 2291 /* update rhs: bb1 = bb - B*x */ 2292 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2293 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2294 2295 /* local sweep */ 2296 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2297 } 2298 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2299 2300 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2301 PetscFunctionReturn(0); 2302 } 2303 2304 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms) 2305 { 2306 PetscErrorCode ierr; 2307 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data; 2308 PetscInt N,i,*garray = aij->garray; 2309 PetscInt ib,jb,bs = A->rmap->bs; 2310 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data; 2311 MatScalar *a_val = a_aij->a; 2312 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data; 2313 MatScalar *b_val = b_aij->a; 2314 PetscReal *work; 2315 2316 PetscFunctionBegin; 2317 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 2318 ierr = PetscCalloc1(N,&work);CHKERRQ(ierr); 2319 if (type == NORM_2) { 2320 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2321 for (jb=0; jb<bs; jb++) { 2322 for (ib=0; ib<bs; ib++) { 2323 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2324 a_val++; 2325 } 2326 } 2327 } 2328 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2329 for (jb=0; jb<bs; jb++) { 2330 for (ib=0; ib<bs; ib++) { 2331 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2332 b_val++; 2333 } 2334 } 2335 } 2336 } else if (type == NORM_1) { 2337 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2338 for (jb=0; jb<bs; jb++) { 2339 for (ib=0; ib<bs; ib++) { 2340 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2341 a_val++; 2342 } 2343 } 2344 } 2345 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2346 for (jb=0; jb<bs; jb++) { 2347 for (ib=0; ib<bs; ib++) { 2348 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2349 b_val++; 2350 } 2351 } 2352 } 2353 } else if (type == NORM_INFINITY) { 2354 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2355 for (jb=0; jb<bs; jb++) { 2356 for (ib=0; ib<bs; ib++) { 2357 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2358 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2359 a_val++; 2360 } 2361 } 2362 } 2363 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2364 for (jb=0; jb<bs; jb++) { 2365 for (ib=0; ib<bs; ib++) { 2366 int col = garray[b_aij->j[i]] * bs + jb; 2367 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2368 b_val++; 2369 } 2370 } 2371 } 2372 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 2373 if (type == NORM_INFINITY) { 2374 ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2375 } else { 2376 ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2377 } 2378 ierr = PetscFree(work);CHKERRQ(ierr); 2379 if (type == NORM_2) { 2380 for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]); 2381 } 2382 PetscFunctionReturn(0); 2383 } 2384 2385 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2386 { 2387 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2388 PetscErrorCode ierr; 2389 2390 PetscFunctionBegin; 2391 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2392 A->factorerrortype = a->A->factorerrortype; 2393 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2394 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2395 PetscFunctionReturn(0); 2396 } 2397 2398 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a) 2399 { 2400 PetscErrorCode ierr; 2401 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ*)Y->data; 2402 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)maij->A->data; 2403 2404 PetscFunctionBegin; 2405 if (!Y->preallocated) { 2406 ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 2407 } else if (!aij->nz) { 2408 PetscInt nonew = aij->nonew; 2409 ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2410 aij->nonew = nonew; 2411 } 2412 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2413 PetscFunctionReturn(0); 2414 } 2415 2416 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool *missing,PetscInt *d) 2417 { 2418 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2419 PetscErrorCode ierr; 2420 2421 PetscFunctionBegin; 2422 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2423 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2424 if (d) { 2425 PetscInt rstart; 2426 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2427 *d += rstart/A->rmap->bs; 2428 2429 } 2430 PetscFunctionReturn(0); 2431 } 2432 2433 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2434 { 2435 PetscFunctionBegin; 2436 *a = ((Mat_MPIBAIJ*)A->data)->A; 2437 PetscFunctionReturn(0); 2438 } 2439 2440 /* -------------------------------------------------------------------*/ 2441 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2442 MatGetRow_MPIBAIJ, 2443 MatRestoreRow_MPIBAIJ, 2444 MatMult_MPIBAIJ, 2445 /* 4*/ MatMultAdd_MPIBAIJ, 2446 MatMultTranspose_MPIBAIJ, 2447 MatMultTransposeAdd_MPIBAIJ, 2448 0, 2449 0, 2450 0, 2451 /*10*/ 0, 2452 0, 2453 0, 2454 MatSOR_MPIBAIJ, 2455 MatTranspose_MPIBAIJ, 2456 /*15*/ MatGetInfo_MPIBAIJ, 2457 MatEqual_MPIBAIJ, 2458 MatGetDiagonal_MPIBAIJ, 2459 MatDiagonalScale_MPIBAIJ, 2460 MatNorm_MPIBAIJ, 2461 /*20*/ MatAssemblyBegin_MPIBAIJ, 2462 MatAssemblyEnd_MPIBAIJ, 2463 MatSetOption_MPIBAIJ, 2464 MatZeroEntries_MPIBAIJ, 2465 /*24*/ MatZeroRows_MPIBAIJ, 2466 0, 2467 0, 2468 0, 2469 0, 2470 /*29*/ MatSetUp_MPIBAIJ, 2471 0, 2472 0, 2473 MatGetDiagonalBlock_MPIBAIJ, 2474 0, 2475 /*34*/ MatDuplicate_MPIBAIJ, 2476 0, 2477 0, 2478 0, 2479 0, 2480 /*39*/ MatAXPY_MPIBAIJ, 2481 MatCreateSubMatrices_MPIBAIJ, 2482 MatIncreaseOverlap_MPIBAIJ, 2483 MatGetValues_MPIBAIJ, 2484 MatCopy_MPIBAIJ, 2485 /*44*/ 0, 2486 MatScale_MPIBAIJ, 2487 MatShift_MPIBAIJ, 2488 0, 2489 MatZeroRowsColumns_MPIBAIJ, 2490 /*49*/ 0, 2491 0, 2492 0, 2493 0, 2494 0, 2495 /*54*/ MatFDColoringCreate_MPIXAIJ, 2496 0, 2497 MatSetUnfactored_MPIBAIJ, 2498 MatPermute_MPIBAIJ, 2499 MatSetValuesBlocked_MPIBAIJ, 2500 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2501 MatDestroy_MPIBAIJ, 2502 MatView_MPIBAIJ, 2503 0, 2504 0, 2505 /*64*/ 0, 2506 0, 2507 0, 2508 0, 2509 0, 2510 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2511 0, 2512 0, 2513 0, 2514 0, 2515 /*74*/ 0, 2516 MatFDColoringApply_BAIJ, 2517 0, 2518 0, 2519 0, 2520 /*79*/ 0, 2521 0, 2522 0, 2523 0, 2524 MatLoad_MPIBAIJ, 2525 /*84*/ 0, 2526 0, 2527 0, 2528 0, 2529 0, 2530 /*89*/ 0, 2531 0, 2532 0, 2533 0, 2534 0, 2535 /*94*/ 0, 2536 0, 2537 0, 2538 0, 2539 0, 2540 /*99*/ 0, 2541 0, 2542 0, 2543 0, 2544 0, 2545 /*104*/0, 2546 MatRealPart_MPIBAIJ, 2547 MatImaginaryPart_MPIBAIJ, 2548 0, 2549 0, 2550 /*109*/0, 2551 0, 2552 0, 2553 0, 2554 MatMissingDiagonal_MPIBAIJ, 2555 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2556 0, 2557 MatGetGhosts_MPIBAIJ, 2558 0, 2559 0, 2560 /*119*/0, 2561 0, 2562 0, 2563 0, 2564 MatGetMultiProcBlock_MPIBAIJ, 2565 /*124*/0, 2566 MatGetColumnNorms_MPIBAIJ, 2567 MatInvertBlockDiagonal_MPIBAIJ, 2568 0, 2569 0, 2570 /*129*/ 0, 2571 0, 2572 0, 2573 0, 2574 0, 2575 /*134*/ 0, 2576 0, 2577 0, 2578 0, 2579 0, 2580 /*139*/ MatSetBlockSizes_Default, 2581 0, 2582 0, 2583 MatFDColoringSetUp_MPIXAIJ, 2584 0, 2585 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ 2586 }; 2587 2588 2589 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 2590 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 2591 2592 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2593 { 2594 PetscInt m,rstart,cstart,cend; 2595 PetscInt i,j,dlen,olen,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2596 const PetscInt *JJ =0; 2597 PetscScalar *values=0; 2598 PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented; 2599 PetscErrorCode ierr; 2600 PetscBool nooffprocentries; 2601 2602 PetscFunctionBegin; 2603 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2604 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2605 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2606 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2607 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2608 m = B->rmap->n/bs; 2609 rstart = B->rmap->rstart/bs; 2610 cstart = B->cmap->rstart/bs; 2611 cend = B->cmap->rend/bs; 2612 2613 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2614 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2615 for (i=0; i<m; i++) { 2616 nz = ii[i+1] - ii[i]; 2617 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2618 nz_max = PetscMax(nz_max,nz); 2619 dlen = 0; 2620 olen = 0; 2621 JJ = jj + ii[i]; 2622 for (j=0; j<nz; j++) { 2623 if (*JJ < cstart || *JJ >= cend) olen++; 2624 else dlen++; 2625 JJ++; 2626 } 2627 d_nnz[i] = dlen; 2628 o_nnz[i] = olen; 2629 } 2630 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2631 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2632 2633 values = (PetscScalar*)V; 2634 if (!values) { 2635 ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2636 } 2637 for (i=0; i<m; i++) { 2638 PetscInt row = i + rstart; 2639 PetscInt ncols = ii[i+1] - ii[i]; 2640 const PetscInt *icols = jj + ii[i]; 2641 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2642 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2643 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2644 } else { /* block ordering does not match so we can only insert one block at a time. */ 2645 PetscInt j; 2646 for (j=0; j<ncols; j++) { 2647 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2648 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2649 } 2650 } 2651 } 2652 2653 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2654 nooffprocentries = B->nooffprocentries; 2655 B->nooffprocentries = PETSC_TRUE; 2656 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2657 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2658 B->nooffprocentries = nooffprocentries; 2659 2660 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2661 PetscFunctionReturn(0); 2662 } 2663 2664 /*@C 2665 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values 2666 2667 Collective 2668 2669 Input Parameters: 2670 + B - the matrix 2671 . bs - the block size 2672 . i - the indices into j for the start of each local row (starts with zero) 2673 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2674 - v - optional values in the matrix 2675 2676 Level: advanced 2677 2678 Notes: 2679 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 2680 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 2681 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2682 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2683 block column and the second index is over columns within a block. 2684 2685 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2686 2687 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ 2688 @*/ 2689 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2690 { 2691 PetscErrorCode ierr; 2692 2693 PetscFunctionBegin; 2694 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2695 PetscValidType(B,1); 2696 PetscValidLogicalCollectiveInt(B,bs,2); 2697 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2698 PetscFunctionReturn(0); 2699 } 2700 2701 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2702 { 2703 Mat_MPIBAIJ *b; 2704 PetscErrorCode ierr; 2705 PetscInt i; 2706 PetscMPIInt size; 2707 2708 PetscFunctionBegin; 2709 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2710 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2711 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2712 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2713 2714 if (d_nnz) { 2715 for (i=0; i<B->rmap->n/bs; i++) { 2716 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2717 } 2718 } 2719 if (o_nnz) { 2720 for (i=0; i<B->rmap->n/bs; i++) { 2721 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2722 } 2723 } 2724 2725 b = (Mat_MPIBAIJ*)B->data; 2726 b->bs2 = bs*bs; 2727 b->mbs = B->rmap->n/bs; 2728 b->nbs = B->cmap->n/bs; 2729 b->Mbs = B->rmap->N/bs; 2730 b->Nbs = B->cmap->N/bs; 2731 2732 for (i=0; i<=b->size; i++) { 2733 b->rangebs[i] = B->rmap->range[i]/bs; 2734 } 2735 b->rstartbs = B->rmap->rstart/bs; 2736 b->rendbs = B->rmap->rend/bs; 2737 b->cstartbs = B->cmap->rstart/bs; 2738 b->cendbs = B->cmap->rend/bs; 2739 2740 #if defined(PETSC_USE_CTABLE) 2741 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2742 #else 2743 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2744 #endif 2745 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2746 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2747 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2748 2749 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2750 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2751 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2752 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2753 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2754 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2755 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2756 2757 if (!B->preallocated) { 2758 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2759 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2760 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2761 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2762 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2763 } 2764 2765 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2766 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2767 B->preallocated = PETSC_TRUE; 2768 B->was_assembled = PETSC_FALSE; 2769 B->assembled = PETSC_FALSE; 2770 PetscFunctionReturn(0); 2771 } 2772 2773 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2774 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2775 2776 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 2777 { 2778 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2779 PetscErrorCode ierr; 2780 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2781 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2782 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2783 2784 PetscFunctionBegin; 2785 ierr = PetscMalloc1(M+1,&ii);CHKERRQ(ierr); 2786 ii[0] = 0; 2787 for (i=0; i<M; i++) { 2788 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2789 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 2790 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2791 /* remove one from count of matrix has diagonal */ 2792 for (j=id[i]; j<id[i+1]; j++) { 2793 if (jd[j] == i) {ii[i+1]--;break;} 2794 } 2795 } 2796 ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr); 2797 cnt = 0; 2798 for (i=0; i<M; i++) { 2799 for (j=io[i]; j<io[i+1]; j++) { 2800 if (garray[jo[j]] > rstart) break; 2801 jj[cnt++] = garray[jo[j]]; 2802 } 2803 for (k=id[i]; k<id[i+1]; k++) { 2804 if (jd[k] != i) { 2805 jj[cnt++] = rstart + jd[k]; 2806 } 2807 } 2808 for (; j<io[i+1]; j++) { 2809 jj[cnt++] = garray[jo[j]]; 2810 } 2811 } 2812 ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr); 2813 PetscFunctionReturn(0); 2814 } 2815 2816 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2817 2818 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 2819 2820 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 2821 { 2822 PetscErrorCode ierr; 2823 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2824 Mat B; 2825 Mat_MPIAIJ *b; 2826 2827 PetscFunctionBegin; 2828 if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 2829 2830 if (reuse == MAT_REUSE_MATRIX) { 2831 B = *newmat; 2832 } else { 2833 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2834 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2835 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2836 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2837 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 2838 ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 2839 } 2840 b = (Mat_MPIAIJ*) B->data; 2841 2842 if (reuse == MAT_REUSE_MATRIX) { 2843 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A);CHKERRQ(ierr); 2844 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B);CHKERRQ(ierr); 2845 } else { 2846 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 2847 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2848 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 2849 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 2850 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 2851 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2852 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2853 } 2854 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2855 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2856 2857 if (reuse == MAT_INPLACE_MATRIX) { 2858 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 2859 } else { 2860 *newmat = B; 2861 } 2862 PetscFunctionReturn(0); 2863 } 2864 2865 /*MC 2866 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2867 2868 Options Database Keys: 2869 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2870 . -mat_block_size <bs> - set the blocksize used to store the matrix 2871 - -mat_use_hash_table <fact> 2872 2873 Level: beginner 2874 2875 Notes: 2876 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 2877 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 2878 2879 .seealso: MatCreateBAIJ 2880 M*/ 2881 2882 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 2883 2884 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2885 { 2886 Mat_MPIBAIJ *b; 2887 PetscErrorCode ierr; 2888 PetscBool flg = PETSC_FALSE; 2889 2890 PetscFunctionBegin; 2891 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2892 B->data = (void*)b; 2893 2894 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2895 B->assembled = PETSC_FALSE; 2896 2897 B->insertmode = NOT_SET_VALUES; 2898 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2899 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2900 2901 /* build local table of row and column ownerships */ 2902 ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr); 2903 2904 /* build cache for off array entries formed */ 2905 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 2906 2907 b->donotstash = PETSC_FALSE; 2908 b->colmap = NULL; 2909 b->garray = NULL; 2910 b->roworiented = PETSC_TRUE; 2911 2912 /* stuff used in block assembly */ 2913 b->barray = 0; 2914 2915 /* stuff used for matrix vector multiply */ 2916 b->lvec = 0; 2917 b->Mvctx = 0; 2918 2919 /* stuff for MatGetRow() */ 2920 b->rowindices = 0; 2921 b->rowvalues = 0; 2922 b->getrowactive = PETSC_FALSE; 2923 2924 /* hash table stuff */ 2925 b->ht = 0; 2926 b->hd = 0; 2927 b->ht_size = 0; 2928 b->ht_flag = PETSC_FALSE; 2929 b->ht_fact = 0; 2930 b->ht_total_ct = 0; 2931 b->ht_insert_ct = 0; 2932 2933 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2934 b->ijonly = PETSC_FALSE; 2935 2936 2937 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 2938 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 2939 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 2940 #if defined(PETSC_HAVE_HYPRE) 2941 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 2942 #endif 2943 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2944 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2945 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2946 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 2947 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2948 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2949 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 2950 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 2951 2952 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 2953 ierr = PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg);CHKERRQ(ierr); 2954 if (flg) { 2955 PetscReal fact = 1.39; 2956 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2957 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 2958 if (fact <= 1.0) fact = 1.39; 2959 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2960 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2961 } 2962 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2963 PetscFunctionReturn(0); 2964 } 2965 2966 /*MC 2967 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2968 2969 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2970 and MATMPIBAIJ otherwise. 2971 2972 Options Database Keys: 2973 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2974 2975 Level: beginner 2976 2977 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2978 M*/ 2979 2980 /*@C 2981 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2982 (block compressed row). For good matrix assembly performance 2983 the user should preallocate the matrix storage by setting the parameters 2984 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2985 performance can be increased by more than a factor of 50. 2986 2987 Collective on Mat 2988 2989 Input Parameters: 2990 + B - the matrix 2991 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2992 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2993 . d_nz - number of block nonzeros per block row in diagonal portion of local 2994 submatrix (same for all local rows) 2995 . d_nnz - array containing the number of block nonzeros in the various block rows 2996 of the in diagonal portion of the local (possibly different for each block 2997 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 2998 set it even if it is zero. 2999 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3000 submatrix (same for all local rows). 3001 - o_nnz - array containing the number of nonzeros in the various block rows of the 3002 off-diagonal portion of the local submatrix (possibly different for 3003 each block row) or NULL. 3004 3005 If the *_nnz parameter is given then the *_nz parameter is ignored 3006 3007 Options Database Keys: 3008 + -mat_block_size - size of the blocks to use 3009 - -mat_use_hash_table <fact> 3010 3011 Notes: 3012 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3013 than it must be used on all processors that share the object for that argument. 3014 3015 Storage Information: 3016 For a square global matrix we define each processor's diagonal portion 3017 to be its local rows and the corresponding columns (a square submatrix); 3018 each processor's off-diagonal portion encompasses the remainder of the 3019 local matrix (a rectangular submatrix). 3020 3021 The user can specify preallocated storage for the diagonal part of 3022 the local submatrix with either d_nz or d_nnz (not both). Set 3023 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3024 memory allocation. Likewise, specify preallocated storage for the 3025 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3026 3027 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3028 the figure below we depict these three local rows and all columns (0-11). 3029 3030 .vb 3031 0 1 2 3 4 5 6 7 8 9 10 11 3032 -------------------------- 3033 row 3 |o o o d d d o o o o o o 3034 row 4 |o o o d d d o o o o o o 3035 row 5 |o o o d d d o o o o o o 3036 -------------------------- 3037 .ve 3038 3039 Thus, any entries in the d locations are stored in the d (diagonal) 3040 submatrix, and any entries in the o locations are stored in the 3041 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3042 stored simply in the MATSEQBAIJ format for compressed row storage. 3043 3044 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3045 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3046 In general, for PDE problems in which most nonzeros are near the diagonal, 3047 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3048 or you will get TERRIBLE performance; see the users' manual chapter on 3049 matrices. 3050 3051 You can call MatGetInfo() to get information on how effective the preallocation was; 3052 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3053 You can also run with the option -info and look for messages with the string 3054 malloc in them to see if additional memory allocation was needed. 3055 3056 Level: intermediate 3057 3058 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3059 @*/ 3060 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3061 { 3062 PetscErrorCode ierr; 3063 3064 PetscFunctionBegin; 3065 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3066 PetscValidType(B,1); 3067 PetscValidLogicalCollectiveInt(B,bs,2); 3068 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3069 PetscFunctionReturn(0); 3070 } 3071 3072 /*@C 3073 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3074 (block compressed row). For good matrix assembly performance 3075 the user should preallocate the matrix storage by setting the parameters 3076 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3077 performance can be increased by more than a factor of 50. 3078 3079 Collective 3080 3081 Input Parameters: 3082 + comm - MPI communicator 3083 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3084 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3085 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3086 This value should be the same as the local size used in creating the 3087 y vector for the matrix-vector product y = Ax. 3088 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3089 This value should be the same as the local size used in creating the 3090 x vector for the matrix-vector product y = Ax. 3091 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3092 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3093 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3094 submatrix (same for all local rows) 3095 . d_nnz - array containing the number of nonzero blocks in the various block rows 3096 of the in diagonal portion of the local (possibly different for each block 3097 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3098 and set it even if it is zero. 3099 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3100 submatrix (same for all local rows). 3101 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3102 off-diagonal portion of the local submatrix (possibly different for 3103 each block row) or NULL. 3104 3105 Output Parameter: 3106 . A - the matrix 3107 3108 Options Database Keys: 3109 + -mat_block_size - size of the blocks to use 3110 - -mat_use_hash_table <fact> 3111 3112 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3113 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3114 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3115 3116 Notes: 3117 If the *_nnz parameter is given then the *_nz parameter is ignored 3118 3119 A nonzero block is any block that as 1 or more nonzeros in it 3120 3121 The user MUST specify either the local or global matrix dimensions 3122 (possibly both). 3123 3124 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3125 than it must be used on all processors that share the object for that argument. 3126 3127 Storage Information: 3128 For a square global matrix we define each processor's diagonal portion 3129 to be its local rows and the corresponding columns (a square submatrix); 3130 each processor's off-diagonal portion encompasses the remainder of the 3131 local matrix (a rectangular submatrix). 3132 3133 The user can specify preallocated storage for the diagonal part of 3134 the local submatrix with either d_nz or d_nnz (not both). Set 3135 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3136 memory allocation. Likewise, specify preallocated storage for the 3137 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3138 3139 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3140 the figure below we depict these three local rows and all columns (0-11). 3141 3142 .vb 3143 0 1 2 3 4 5 6 7 8 9 10 11 3144 -------------------------- 3145 row 3 |o o o d d d o o o o o o 3146 row 4 |o o o d d d o o o o o o 3147 row 5 |o o o d d d o o o o o o 3148 -------------------------- 3149 .ve 3150 3151 Thus, any entries in the d locations are stored in the d (diagonal) 3152 submatrix, and any entries in the o locations are stored in the 3153 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3154 stored simply in the MATSEQBAIJ format for compressed row storage. 3155 3156 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3157 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3158 In general, for PDE problems in which most nonzeros are near the diagonal, 3159 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3160 or you will get TERRIBLE performance; see the users' manual chapter on 3161 matrices. 3162 3163 Level: intermediate 3164 3165 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3166 @*/ 3167 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3168 { 3169 PetscErrorCode ierr; 3170 PetscMPIInt size; 3171 3172 PetscFunctionBegin; 3173 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3174 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3175 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3176 if (size > 1) { 3177 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3178 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3179 } else { 3180 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3181 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3182 } 3183 PetscFunctionReturn(0); 3184 } 3185 3186 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3187 { 3188 Mat mat; 3189 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3190 PetscErrorCode ierr; 3191 PetscInt len=0; 3192 3193 PetscFunctionBegin; 3194 *newmat = 0; 3195 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3196 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3197 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3198 3199 mat->factortype = matin->factortype; 3200 mat->preallocated = PETSC_TRUE; 3201 mat->assembled = PETSC_TRUE; 3202 mat->insertmode = NOT_SET_VALUES; 3203 3204 a = (Mat_MPIBAIJ*)mat->data; 3205 mat->rmap->bs = matin->rmap->bs; 3206 a->bs2 = oldmat->bs2; 3207 a->mbs = oldmat->mbs; 3208 a->nbs = oldmat->nbs; 3209 a->Mbs = oldmat->Mbs; 3210 a->Nbs = oldmat->Nbs; 3211 3212 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3213 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3214 3215 a->size = oldmat->size; 3216 a->rank = oldmat->rank; 3217 a->donotstash = oldmat->donotstash; 3218 a->roworiented = oldmat->roworiented; 3219 a->rowindices = 0; 3220 a->rowvalues = 0; 3221 a->getrowactive = PETSC_FALSE; 3222 a->barray = 0; 3223 a->rstartbs = oldmat->rstartbs; 3224 a->rendbs = oldmat->rendbs; 3225 a->cstartbs = oldmat->cstartbs; 3226 a->cendbs = oldmat->cendbs; 3227 3228 /* hash table stuff */ 3229 a->ht = 0; 3230 a->hd = 0; 3231 a->ht_size = 0; 3232 a->ht_flag = oldmat->ht_flag; 3233 a->ht_fact = oldmat->ht_fact; 3234 a->ht_total_ct = 0; 3235 a->ht_insert_ct = 0; 3236 3237 ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+1);CHKERRQ(ierr); 3238 if (oldmat->colmap) { 3239 #if defined(PETSC_USE_CTABLE) 3240 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3241 #else 3242 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 3243 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3244 ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr); 3245 #endif 3246 } else a->colmap = 0; 3247 3248 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3249 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 3250 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3251 ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); 3252 } else a->garray = 0; 3253 3254 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3255 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3256 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3257 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3258 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3259 3260 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3261 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3262 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3263 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3264 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3265 *newmat = mat; 3266 PetscFunctionReturn(0); 3267 } 3268 3269 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3270 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 3271 { 3272 PetscInt header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k; 3273 PetscInt *rowidxs,*colidxs,rs,cs,ce; 3274 PetscScalar *matvals; 3275 PetscErrorCode ierr; 3276 3277 PetscFunctionBegin; 3278 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3279 3280 /* read in matrix header */ 3281 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3282 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3283 M = header[1]; N = header[2]; nz = header[3]; 3284 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 3285 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 3286 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3287 3288 /* set block sizes from the viewer's .info file */ 3289 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3290 /* set local sizes if not set already */ 3291 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3292 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3293 /* set global sizes if not set already */ 3294 if (mat->rmap->N < 0) mat->rmap->N = M; 3295 if (mat->cmap->N < 0) mat->cmap->N = N; 3296 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3297 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3298 3299 /* check if the matrix sizes are correct */ 3300 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3301 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 3302 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 3303 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 3304 ierr = PetscLayoutGetRange(mat->rmap,&rs,NULL); 3305 ierr = PetscLayoutGetRange(mat->cmap,&cs,&ce); 3306 mbs = m/bs; nbs = n/bs; 3307 3308 /* read in row lengths and build row indices */ 3309 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3310 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 3311 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3312 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr); 3313 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 3314 3315 /* read in column indices and matrix values */ 3316 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 3317 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 3318 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 3319 3320 { /* preallocate matrix storage */ 3321 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3322 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3323 PetscBool sbaij,done; 3324 PetscInt *d_nnz,*o_nnz; 3325 3326 ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr); 3327 ierr = PetscHSetICreate(&ht);CHKERRQ(ierr); 3328 ierr = PetscCalloc2(mbs,&d_nnz,mbs,&o_nnz);CHKERRQ(ierr); 3329 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPISBAIJ,&sbaij);CHKERRQ(ierr); 3330 for (i=0; i<mbs; i++) { 3331 ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr); 3332 ierr = PetscHSetIClear(ht);CHKERRQ(ierr); 3333 for (k=0; k<bs; k++) { 3334 PetscInt row = bs*i + k; 3335 for (j=rowidxs[row]; j<rowidxs[row+1]; j++) { 3336 PetscInt col = colidxs[j]; 3337 if (!sbaij || col >= row) { 3338 if (col >= cs && col < ce) { 3339 if (!PetscBTLookupSet(bt,(col-cs)/bs)) d_nnz[i]++; 3340 } else { 3341 ierr = PetscHSetIQueryAdd(ht,col/bs,&done);CHKERRQ(ierr); 3342 if (done) o_nnz[i]++; 3343 } 3344 } 3345 } 3346 } 3347 } 3348 ierr = PetscBTDestroy(&bt);CHKERRQ(ierr); 3349 ierr = PetscHSetIDestroy(&ht);CHKERRQ(ierr); 3350 ierr = MatMPIBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3351 ierr = MatMPISBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3352 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3353 } 3354 3355 /* store matrix values */ 3356 for (i=0; i<m; i++) { 3357 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i+1]; 3358 ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr); 3359 } 3360 3361 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3362 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3363 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3364 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3365 PetscFunctionReturn(0); 3366 } 3367 3368 PetscErrorCode MatLoad_MPIBAIJ(Mat mat,PetscViewer viewer) 3369 { 3370 PetscErrorCode ierr; 3371 PetscBool isbinary; 3372 3373 PetscFunctionBegin; 3374 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3375 if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 3376 ierr = MatLoad_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 3377 PetscFunctionReturn(0); 3378 } 3379 3380 /*@ 3381 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3382 3383 Input Parameters: 3384 + mat - the matrix 3385 - fact - factor 3386 3387 Not Collective, each process can use a different factor 3388 3389 Level: advanced 3390 3391 Notes: 3392 This can also be set by the command line option: -mat_use_hash_table <fact> 3393 3394 .seealso: MatSetOption() 3395 @*/ 3396 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3397 { 3398 PetscErrorCode ierr; 3399 3400 PetscFunctionBegin; 3401 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3402 PetscFunctionReturn(0); 3403 } 3404 3405 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3406 { 3407 Mat_MPIBAIJ *baij; 3408 3409 PetscFunctionBegin; 3410 baij = (Mat_MPIBAIJ*)mat->data; 3411 baij->ht_fact = fact; 3412 PetscFunctionReturn(0); 3413 } 3414 3415 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3416 { 3417 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3418 PetscBool flg; 3419 PetscErrorCode ierr; 3420 3421 PetscFunctionBegin; 3422 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg);CHKERRQ(ierr); 3423 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input"); 3424 if (Ad) *Ad = a->A; 3425 if (Ao) *Ao = a->B; 3426 if (colmap) *colmap = a->garray; 3427 PetscFunctionReturn(0); 3428 } 3429 3430 /* 3431 Special version for direct calls from Fortran (to eliminate two function call overheads 3432 */ 3433 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3434 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3435 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3436 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3437 #endif 3438 3439 /*@C 3440 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3441 3442 Collective on Mat 3443 3444 Input Parameters: 3445 + mat - the matrix 3446 . min - number of input rows 3447 . im - input rows 3448 . nin - number of input columns 3449 . in - input columns 3450 . v - numerical values input 3451 - addvin - INSERT_VALUES or ADD_VALUES 3452 3453 Notes: 3454 This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3455 3456 Level: advanced 3457 3458 .seealso: MatSetValuesBlocked() 3459 @*/ 3460 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3461 { 3462 /* convert input arguments to C version */ 3463 Mat mat = *matin; 3464 PetscInt m = *min, n = *nin; 3465 InsertMode addv = *addvin; 3466 3467 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3468 const MatScalar *value; 3469 MatScalar *barray = baij->barray; 3470 PetscBool roworiented = baij->roworiented; 3471 PetscErrorCode ierr; 3472 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3473 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3474 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3475 3476 PetscFunctionBegin; 3477 /* tasks normally handled by MatSetValuesBlocked() */ 3478 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3479 else if (PetscUnlikely(mat->insertmode != addv)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3480 if (PetscUnlikely(mat->factortype)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3481 if (mat->assembled) { 3482 mat->was_assembled = PETSC_TRUE; 3483 mat->assembled = PETSC_FALSE; 3484 } 3485 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3486 3487 3488 if (!barray) { 3489 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 3490 baij->barray = barray; 3491 } 3492 3493 if (roworiented) stepval = (n-1)*bs; 3494 else stepval = (m-1)*bs; 3495 3496 for (i=0; i<m; i++) { 3497 if (im[i] < 0) continue; 3498 if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3499 if (im[i] >= rstart && im[i] < rend) { 3500 row = im[i] - rstart; 3501 for (j=0; j<n; j++) { 3502 /* If NumCol = 1 then a copy is not required */ 3503 if ((roworiented) && (n == 1)) { 3504 barray = (MatScalar*)v + i*bs2; 3505 } else if ((!roworiented) && (m == 1)) { 3506 barray = (MatScalar*)v + j*bs2; 3507 } else { /* Here a copy is required */ 3508 if (roworiented) { 3509 value = v + i*(stepval+bs)*bs + j*bs; 3510 } else { 3511 value = v + j*(stepval+bs)*bs + i*bs; 3512 } 3513 for (ii=0; ii<bs; ii++,value+=stepval) { 3514 for (jj=0; jj<bs; jj++) { 3515 *barray++ = *value++; 3516 } 3517 } 3518 barray -=bs2; 3519 } 3520 3521 if (in[j] >= cstart && in[j] < cend) { 3522 col = in[j] - cstart; 3523 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 3524 } else if (in[j] < 0) continue; 3525 else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 3526 else { 3527 if (mat->was_assembled) { 3528 if (!baij->colmap) { 3529 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3530 } 3531 3532 #if defined(PETSC_USE_DEBUG) 3533 #if defined(PETSC_USE_CTABLE) 3534 { PetscInt data; 3535 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3536 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3537 } 3538 #else 3539 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3540 #endif 3541 #endif 3542 #if defined(PETSC_USE_CTABLE) 3543 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3544 col = (col - 1)/bs; 3545 #else 3546 col = (baij->colmap[in[j]] - 1)/bs; 3547 #endif 3548 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3549 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3550 col = in[j]; 3551 } 3552 } else col = in[j]; 3553 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 3554 } 3555 } 3556 } else { 3557 if (!baij->donotstash) { 3558 if (roworiented) { 3559 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3560 } else { 3561 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3562 } 3563 } 3564 } 3565 } 3566 3567 /* task normally handled by MatSetValuesBlocked() */ 3568 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3569 PetscFunctionReturn(0); 3570 } 3571 3572 /*@ 3573 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block 3574 CSR format the local rows. 3575 3576 Collective 3577 3578 Input Parameters: 3579 + comm - MPI communicator 3580 . bs - the block size, only a block size of 1 is supported 3581 . m - number of local rows (Cannot be PETSC_DECIDE) 3582 . n - This value should be the same as the local size used in creating the 3583 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3584 calculated if N is given) For square matrices n is almost always m. 3585 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3586 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3587 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3588 . j - column indices 3589 - a - matrix values 3590 3591 Output Parameter: 3592 . mat - the matrix 3593 3594 Level: intermediate 3595 3596 Notes: 3597 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3598 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3599 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3600 3601 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3602 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3603 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3604 with column-major ordering within blocks. 3605 3606 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3607 3608 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3609 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3610 @*/ 3611 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3612 { 3613 PetscErrorCode ierr; 3614 3615 PetscFunctionBegin; 3616 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3617 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3618 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3619 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3620 ierr = MatSetType(*mat,MATMPIBAIJ);CHKERRQ(ierr); 3621 ierr = MatSetBlockSize(*mat,bs);CHKERRQ(ierr); 3622 ierr = MatSetUp(*mat);CHKERRQ(ierr); 3623 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3624 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3625 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3626 PetscFunctionReturn(0); 3627 } 3628 3629 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3630 { 3631 PetscErrorCode ierr; 3632 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3633 PetscInt *indx; 3634 PetscScalar *values; 3635 3636 PetscFunctionBegin; 3637 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3638 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3639 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inmat->data; 3640 PetscInt *dnz,*onz,mbs,Nbs,nbs; 3641 PetscInt *bindx,rmax=a->rmax,j; 3642 PetscMPIInt rank,size; 3643 3644 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3645 mbs = m/bs; Nbs = N/cbs; 3646 if (n == PETSC_DECIDE) { 3647 ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N); 3648 } 3649 nbs = n/cbs; 3650 3651 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 3652 ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */ 3653 3654 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3655 ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr); 3656 if (rank == size-1) { 3657 /* Check sum(nbs) = Nbs */ 3658 if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs); 3659 } 3660 3661 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */ 3662 for (i=0; i<mbs; i++) { 3663 ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3664 nnz = nnz/bs; 3665 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3666 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3667 ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3668 } 3669 ierr = PetscFree(bindx);CHKERRQ(ierr); 3670 3671 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3672 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3673 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3674 ierr = MatSetType(*outmat,MATBAIJ);CHKERRQ(ierr); 3675 ierr = MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr); 3676 ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3677 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3678 } 3679 3680 /* numeric phase */ 3681 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3682 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3683 3684 for (i=0; i<m; i++) { 3685 ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3686 Ii = i + rstart; 3687 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3688 ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3689 } 3690 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3691 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3692 PetscFunctionReturn(0); 3693 } 3694