1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 #if defined(PETSC_HAVE_HYPRE) 8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 9 #endif 10 11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 12 { 13 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 14 PetscErrorCode ierr; 15 PetscInt i,*idxb = 0; 16 PetscScalar *va,*vb; 17 Vec vtmp; 18 19 PetscFunctionBegin; 20 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 21 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 22 if (idx) { 23 for (i=0; i<A->rmap->n; i++) { 24 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 25 } 26 } 27 28 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 29 if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);} 30 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 31 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 32 33 for (i=0; i<A->rmap->n; i++) { 34 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 35 va[i] = vb[i]; 36 if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs); 37 } 38 } 39 40 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 41 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 42 ierr = PetscFree(idxb);CHKERRQ(ierr); 43 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 44 PetscFunctionReturn(0); 45 } 46 47 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 48 { 49 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 54 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 55 PetscFunctionReturn(0); 56 } 57 58 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 59 { 60 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 61 PetscErrorCode ierr; 62 63 PetscFunctionBegin; 64 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 65 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 66 PetscFunctionReturn(0); 67 } 68 69 /* 70 Local utility routine that creates a mapping from the global column 71 number to the local number in the off-diagonal part of the local 72 storage of the matrix. This is done in a non scalable way since the 73 length of colmap equals the global matrix length. 74 */ 75 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 76 { 77 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 78 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 79 PetscErrorCode ierr; 80 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 81 82 PetscFunctionBegin; 83 #if defined(PETSC_USE_CTABLE) 84 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 85 for (i=0; i<nbs; i++) { 86 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 87 } 88 #else 89 ierr = PetscCalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 90 ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 91 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 92 #endif 93 PetscFunctionReturn(0); 94 } 95 96 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 97 { \ 98 brow = row/bs; \ 99 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 100 rmax = aimax[brow]; nrow = ailen[brow]; \ 101 bcol = col/bs; \ 102 ridx = row % bs; cidx = col % bs; \ 103 low = 0; high = nrow; \ 104 while (high-low > 3) { \ 105 t = (low+high)/2; \ 106 if (rp[t] > bcol) high = t; \ 107 else low = t; \ 108 } \ 109 for (_i=low; _i<high; _i++) { \ 110 if (rp[_i] > bcol) break; \ 111 if (rp[_i] == bcol) { \ 112 bap = ap + bs2*_i + bs*cidx + ridx; \ 113 if (addv == ADD_VALUES) *bap += value; \ 114 else *bap = value; \ 115 goto a_noinsert; \ 116 } \ 117 } \ 118 if (a->nonew == 1) goto a_noinsert; \ 119 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 120 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 121 N = nrow++ - 1; \ 122 /* shift up all the later entries in this row */ \ 123 ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\ 124 ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \ 125 ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \ 126 rp[_i] = bcol; \ 127 ap[bs2*_i + bs*cidx + ridx] = value; \ 128 a_noinsert:; \ 129 ailen[brow] = nrow; \ 130 } 131 132 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 133 { \ 134 brow = row/bs; \ 135 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 136 rmax = bimax[brow]; nrow = bilen[brow]; \ 137 bcol = col/bs; \ 138 ridx = row % bs; cidx = col % bs; \ 139 low = 0; high = nrow; \ 140 while (high-low > 3) { \ 141 t = (low+high)/2; \ 142 if (rp[t] > bcol) high = t; \ 143 else low = t; \ 144 } \ 145 for (_i=low; _i<high; _i++) { \ 146 if (rp[_i] > bcol) break; \ 147 if (rp[_i] == bcol) { \ 148 bap = ap + bs2*_i + bs*cidx + ridx; \ 149 if (addv == ADD_VALUES) *bap += value; \ 150 else *bap = value; \ 151 goto b_noinsert; \ 152 } \ 153 } \ 154 if (b->nonew == 1) goto b_noinsert; \ 155 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 156 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 157 N = nrow++ - 1; \ 158 /* shift up all the later entries in this row */ \ 159 ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\ 160 ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr);\ 161 ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \ 162 rp[_i] = bcol; \ 163 ap[bs2*_i + bs*cidx + ridx] = value; \ 164 b_noinsert:; \ 165 bilen[brow] = nrow; \ 166 } 167 168 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 169 { 170 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 171 MatScalar value; 172 PetscBool roworiented = baij->roworiented; 173 PetscErrorCode ierr; 174 PetscInt i,j,row,col; 175 PetscInt rstart_orig=mat->rmap->rstart; 176 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 177 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 178 179 /* Some Variables required in the macro */ 180 Mat A = baij->A; 181 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 182 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 183 MatScalar *aa =a->a; 184 185 Mat B = baij->B; 186 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 187 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 188 MatScalar *ba =b->a; 189 190 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 191 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 192 MatScalar *ap,*bap; 193 194 PetscFunctionBegin; 195 for (i=0; i<m; i++) { 196 if (im[i] < 0) continue; 197 #if defined(PETSC_USE_DEBUG) 198 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 199 #endif 200 if (im[i] >= rstart_orig && im[i] < rend_orig) { 201 row = im[i] - rstart_orig; 202 for (j=0; j<n; j++) { 203 if (in[j] >= cstart_orig && in[j] < cend_orig) { 204 col = in[j] - cstart_orig; 205 if (roworiented) value = v[i*n+j]; 206 else value = v[i+j*m]; 207 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 208 } else if (in[j] < 0) continue; 209 #if defined(PETSC_USE_DEBUG) 210 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 211 #endif 212 else { 213 if (mat->was_assembled) { 214 if (!baij->colmap) { 215 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 216 } 217 #if defined(PETSC_USE_CTABLE) 218 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 219 col = col - 1; 220 #else 221 col = baij->colmap[in[j]/bs] - 1; 222 #endif 223 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 224 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 225 col = in[j]; 226 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 227 B = baij->B; 228 b = (Mat_SeqBAIJ*)(B)->data; 229 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 230 ba =b->a; 231 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 232 else col += in[j]%bs; 233 } else col = in[j]; 234 if (roworiented) value = v[i*n+j]; 235 else value = v[i+j*m]; 236 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 237 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 238 } 239 } 240 } else { 241 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 242 if (!baij->donotstash) { 243 mat->assembled = PETSC_FALSE; 244 if (roworiented) { 245 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 246 } else { 247 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 248 } 249 } 250 } 251 } 252 PetscFunctionReturn(0); 253 } 254 255 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 256 { 257 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 258 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 259 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 260 PetscErrorCode ierr; 261 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 262 PetscBool roworiented=a->roworiented; 263 const PetscScalar *value = v; 264 MatScalar *ap,*aa = a->a,*bap; 265 266 PetscFunctionBegin; 267 rp = aj + ai[row]; 268 ap = aa + bs2*ai[row]; 269 rmax = imax[row]; 270 nrow = ailen[row]; 271 value = v; 272 low = 0; 273 high = nrow; 274 while (high-low > 7) { 275 t = (low+high)/2; 276 if (rp[t] > col) high = t; 277 else low = t; 278 } 279 for (i=low; i<high; i++) { 280 if (rp[i] > col) break; 281 if (rp[i] == col) { 282 bap = ap + bs2*i; 283 if (roworiented) { 284 if (is == ADD_VALUES) { 285 for (ii=0; ii<bs; ii++) { 286 for (jj=ii; jj<bs2; jj+=bs) { 287 bap[jj] += *value++; 288 } 289 } 290 } else { 291 for (ii=0; ii<bs; ii++) { 292 for (jj=ii; jj<bs2; jj+=bs) { 293 bap[jj] = *value++; 294 } 295 } 296 } 297 } else { 298 if (is == ADD_VALUES) { 299 for (ii=0; ii<bs; ii++,value+=bs) { 300 for (jj=0; jj<bs; jj++) { 301 bap[jj] += value[jj]; 302 } 303 bap += bs; 304 } 305 } else { 306 for (ii=0; ii<bs; ii++,value+=bs) { 307 for (jj=0; jj<bs; jj++) { 308 bap[jj] = value[jj]; 309 } 310 bap += bs; 311 } 312 } 313 } 314 goto noinsert2; 315 } 316 } 317 if (nonew == 1) goto noinsert2; 318 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 319 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 320 N = nrow++ - 1; high++; 321 /* shift up all the later entries in this row */ 322 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 323 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 324 rp[i] = col; 325 bap = ap + bs2*i; 326 if (roworiented) { 327 for (ii=0; ii<bs; ii++) { 328 for (jj=ii; jj<bs2; jj+=bs) { 329 bap[jj] = *value++; 330 } 331 } 332 } else { 333 for (ii=0; ii<bs; ii++) { 334 for (jj=0; jj<bs; jj++) { 335 *bap++ = *value++; 336 } 337 } 338 } 339 noinsert2:; 340 ailen[row] = nrow; 341 PetscFunctionReturn(0); 342 } 343 344 /* 345 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 346 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 347 */ 348 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 349 { 350 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 351 const PetscScalar *value; 352 MatScalar *barray = baij->barray; 353 PetscBool roworiented = baij->roworiented; 354 PetscErrorCode ierr; 355 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 356 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 357 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 358 359 PetscFunctionBegin; 360 if (!barray) { 361 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 362 baij->barray = barray; 363 } 364 365 if (roworiented) stepval = (n-1)*bs; 366 else stepval = (m-1)*bs; 367 368 for (i=0; i<m; i++) { 369 if (im[i] < 0) continue; 370 #if defined(PETSC_USE_DEBUG) 371 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 372 #endif 373 if (im[i] >= rstart && im[i] < rend) { 374 row = im[i] - rstart; 375 for (j=0; j<n; j++) { 376 /* If NumCol = 1 then a copy is not required */ 377 if ((roworiented) && (n == 1)) { 378 barray = (MatScalar*)v + i*bs2; 379 } else if ((!roworiented) && (m == 1)) { 380 barray = (MatScalar*)v + j*bs2; 381 } else { /* Here a copy is required */ 382 if (roworiented) { 383 value = v + (i*(stepval+bs) + j)*bs; 384 } else { 385 value = v + (j*(stepval+bs) + i)*bs; 386 } 387 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 388 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 389 barray += bs; 390 } 391 barray -= bs2; 392 } 393 394 if (in[j] >= cstart && in[j] < cend) { 395 col = in[j] - cstart; 396 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 397 } else if (in[j] < 0) continue; 398 #if defined(PETSC_USE_DEBUG) 399 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 400 #endif 401 else { 402 if (mat->was_assembled) { 403 if (!baij->colmap) { 404 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 405 } 406 407 #if defined(PETSC_USE_DEBUG) 408 #if defined(PETSC_USE_CTABLE) 409 { PetscInt data; 410 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 411 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 412 } 413 #else 414 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 415 #endif 416 #endif 417 #if defined(PETSC_USE_CTABLE) 418 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 419 col = (col - 1)/bs; 420 #else 421 col = (baij->colmap[in[j]] - 1)/bs; 422 #endif 423 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 424 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 425 col = in[j]; 426 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]); 427 } else col = in[j]; 428 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 429 } 430 } 431 } else { 432 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 433 if (!baij->donotstash) { 434 if (roworiented) { 435 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 436 } else { 437 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 438 } 439 } 440 } 441 } 442 PetscFunctionReturn(0); 443 } 444 445 #define HASH_KEY 0.6180339887 446 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 447 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 448 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 449 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 450 { 451 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 452 PetscBool roworiented = baij->roworiented; 453 PetscErrorCode ierr; 454 PetscInt i,j,row,col; 455 PetscInt rstart_orig=mat->rmap->rstart; 456 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 457 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 458 PetscReal tmp; 459 MatScalar **HD = baij->hd,value; 460 #if defined(PETSC_USE_DEBUG) 461 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 462 #endif 463 464 PetscFunctionBegin; 465 for (i=0; i<m; i++) { 466 #if defined(PETSC_USE_DEBUG) 467 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 468 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 469 #endif 470 row = im[i]; 471 if (row >= rstart_orig && row < rend_orig) { 472 for (j=0; j<n; j++) { 473 col = in[j]; 474 if (roworiented) value = v[i*n+j]; 475 else value = v[i+j*m]; 476 /* Look up PetscInto the Hash Table */ 477 key = (row/bs)*Nbs+(col/bs)+1; 478 h1 = HASH(size,key,tmp); 479 480 481 idx = h1; 482 #if defined(PETSC_USE_DEBUG) 483 insert_ct++; 484 total_ct++; 485 if (HT[idx] != key) { 486 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 487 if (idx == size) { 488 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 489 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 490 } 491 } 492 #else 493 if (HT[idx] != key) { 494 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 495 if (idx == size) { 496 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 497 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 498 } 499 } 500 #endif 501 /* A HASH table entry is found, so insert the values at the correct address */ 502 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 503 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 504 } 505 } else if (!baij->donotstash) { 506 if (roworiented) { 507 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 508 } else { 509 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 510 } 511 } 512 } 513 #if defined(PETSC_USE_DEBUG) 514 baij->ht_total_ct += total_ct; 515 baij->ht_insert_ct += insert_ct; 516 #endif 517 PetscFunctionReturn(0); 518 } 519 520 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 521 { 522 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 523 PetscBool roworiented = baij->roworiented; 524 PetscErrorCode ierr; 525 PetscInt i,j,ii,jj,row,col; 526 PetscInt rstart=baij->rstartbs; 527 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 528 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 529 PetscReal tmp; 530 MatScalar **HD = baij->hd,*baij_a; 531 const PetscScalar *v_t,*value; 532 #if defined(PETSC_USE_DEBUG) 533 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 534 #endif 535 536 PetscFunctionBegin; 537 if (roworiented) stepval = (n-1)*bs; 538 else stepval = (m-1)*bs; 539 540 for (i=0; i<m; i++) { 541 #if defined(PETSC_USE_DEBUG) 542 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 543 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 544 #endif 545 row = im[i]; 546 v_t = v + i*nbs2; 547 if (row >= rstart && row < rend) { 548 for (j=0; j<n; j++) { 549 col = in[j]; 550 551 /* Look up into the Hash Table */ 552 key = row*Nbs+col+1; 553 h1 = HASH(size,key,tmp); 554 555 idx = h1; 556 #if defined(PETSC_USE_DEBUG) 557 total_ct++; 558 insert_ct++; 559 if (HT[idx] != key) { 560 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 561 if (idx == size) { 562 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 563 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 564 } 565 } 566 #else 567 if (HT[idx] != key) { 568 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 569 if (idx == size) { 570 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 571 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 572 } 573 } 574 #endif 575 baij_a = HD[idx]; 576 if (roworiented) { 577 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 578 /* value = v + (i*(stepval+bs)+j)*bs; */ 579 value = v_t; 580 v_t += bs; 581 if (addv == ADD_VALUES) { 582 for (ii=0; ii<bs; ii++,value+=stepval) { 583 for (jj=ii; jj<bs2; jj+=bs) { 584 baij_a[jj] += *value++; 585 } 586 } 587 } else { 588 for (ii=0; ii<bs; ii++,value+=stepval) { 589 for (jj=ii; jj<bs2; jj+=bs) { 590 baij_a[jj] = *value++; 591 } 592 } 593 } 594 } else { 595 value = v + j*(stepval+bs)*bs + i*bs; 596 if (addv == ADD_VALUES) { 597 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 598 for (jj=0; jj<bs; jj++) { 599 baij_a[jj] += *value++; 600 } 601 } 602 } else { 603 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 604 for (jj=0; jj<bs; jj++) { 605 baij_a[jj] = *value++; 606 } 607 } 608 } 609 } 610 } 611 } else { 612 if (!baij->donotstash) { 613 if (roworiented) { 614 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 615 } else { 616 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 617 } 618 } 619 } 620 } 621 #if defined(PETSC_USE_DEBUG) 622 baij->ht_total_ct += total_ct; 623 baij->ht_insert_ct += insert_ct; 624 #endif 625 PetscFunctionReturn(0); 626 } 627 628 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 629 { 630 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 631 PetscErrorCode ierr; 632 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 633 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 634 635 PetscFunctionBegin; 636 for (i=0; i<m; i++) { 637 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 638 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 639 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 640 row = idxm[i] - bsrstart; 641 for (j=0; j<n; j++) { 642 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 643 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 644 if (idxn[j] >= bscstart && idxn[j] < bscend) { 645 col = idxn[j] - bscstart; 646 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 647 } else { 648 if (!baij->colmap) { 649 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 650 } 651 #if defined(PETSC_USE_CTABLE) 652 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 653 data--; 654 #else 655 data = baij->colmap[idxn[j]/bs]-1; 656 #endif 657 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 658 else { 659 col = data + idxn[j]%bs; 660 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 661 } 662 } 663 } 664 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 665 } 666 PetscFunctionReturn(0); 667 } 668 669 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 670 { 671 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 672 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 673 PetscErrorCode ierr; 674 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 675 PetscReal sum = 0.0; 676 MatScalar *v; 677 678 PetscFunctionBegin; 679 if (baij->size == 1) { 680 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 681 } else { 682 if (type == NORM_FROBENIUS) { 683 v = amat->a; 684 nz = amat->nz*bs2; 685 for (i=0; i<nz; i++) { 686 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 687 } 688 v = bmat->a; 689 nz = bmat->nz*bs2; 690 for (i=0; i<nz; i++) { 691 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 692 } 693 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 694 *nrm = PetscSqrtReal(*nrm); 695 } else if (type == NORM_1) { /* max column sum */ 696 PetscReal *tmp,*tmp2; 697 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 698 ierr = PetscCalloc1(mat->cmap->N,&tmp);CHKERRQ(ierr); 699 ierr = PetscMalloc1(mat->cmap->N,&tmp2);CHKERRQ(ierr); 700 v = amat->a; jj = amat->j; 701 for (i=0; i<amat->nz; i++) { 702 for (j=0; j<bs; j++) { 703 col = bs*(cstart + *jj) + j; /* column index */ 704 for (row=0; row<bs; row++) { 705 tmp[col] += PetscAbsScalar(*v); v++; 706 } 707 } 708 jj++; 709 } 710 v = bmat->a; jj = bmat->j; 711 for (i=0; i<bmat->nz; i++) { 712 for (j=0; j<bs; j++) { 713 col = bs*garray[*jj] + j; 714 for (row=0; row<bs; row++) { 715 tmp[col] += PetscAbsScalar(*v); v++; 716 } 717 } 718 jj++; 719 } 720 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 721 *nrm = 0.0; 722 for (j=0; j<mat->cmap->N; j++) { 723 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 724 } 725 ierr = PetscFree(tmp);CHKERRQ(ierr); 726 ierr = PetscFree(tmp2);CHKERRQ(ierr); 727 } else if (type == NORM_INFINITY) { /* max row sum */ 728 PetscReal *sums; 729 ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr); 730 sum = 0.0; 731 for (j=0; j<amat->mbs; j++) { 732 for (row=0; row<bs; row++) sums[row] = 0.0; 733 v = amat->a + bs2*amat->i[j]; 734 nz = amat->i[j+1]-amat->i[j]; 735 for (i=0; i<nz; i++) { 736 for (col=0; col<bs; col++) { 737 for (row=0; row<bs; row++) { 738 sums[row] += PetscAbsScalar(*v); v++; 739 } 740 } 741 } 742 v = bmat->a + bs2*bmat->i[j]; 743 nz = bmat->i[j+1]-bmat->i[j]; 744 for (i=0; i<nz; i++) { 745 for (col=0; col<bs; col++) { 746 for (row=0; row<bs; row++) { 747 sums[row] += PetscAbsScalar(*v); v++; 748 } 749 } 750 } 751 for (row=0; row<bs; row++) { 752 if (sums[row] > sum) sum = sums[row]; 753 } 754 } 755 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 756 ierr = PetscFree(sums);CHKERRQ(ierr); 757 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 758 } 759 PetscFunctionReturn(0); 760 } 761 762 /* 763 Creates the hash table, and sets the table 764 This table is created only once. 765 If new entried need to be added to the matrix 766 then the hash table has to be destroyed and 767 recreated. 768 */ 769 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 770 { 771 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 772 Mat A = baij->A,B=baij->B; 773 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 774 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 775 PetscErrorCode ierr; 776 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 777 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 778 PetscInt *HT,key; 779 MatScalar **HD; 780 PetscReal tmp; 781 #if defined(PETSC_USE_INFO) 782 PetscInt ct=0,max=0; 783 #endif 784 785 PetscFunctionBegin; 786 if (baij->ht) PetscFunctionReturn(0); 787 788 baij->ht_size = (PetscInt)(factor*nz); 789 ht_size = baij->ht_size; 790 791 /* Allocate Memory for Hash Table */ 792 ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr); 793 HD = baij->hd; 794 HT = baij->ht; 795 796 /* Loop Over A */ 797 for (i=0; i<a->mbs; i++) { 798 for (j=ai[i]; j<ai[i+1]; j++) { 799 row = i+rstart; 800 col = aj[j]+cstart; 801 802 key = row*Nbs + col + 1; 803 h1 = HASH(ht_size,key,tmp); 804 for (k=0; k<ht_size; k++) { 805 if (!HT[(h1+k)%ht_size]) { 806 HT[(h1+k)%ht_size] = key; 807 HD[(h1+k)%ht_size] = a->a + j*bs2; 808 break; 809 #if defined(PETSC_USE_INFO) 810 } else { 811 ct++; 812 #endif 813 } 814 } 815 #if defined(PETSC_USE_INFO) 816 if (k> max) max = k; 817 #endif 818 } 819 } 820 /* Loop Over B */ 821 for (i=0; i<b->mbs; i++) { 822 for (j=bi[i]; j<bi[i+1]; j++) { 823 row = i+rstart; 824 col = garray[bj[j]]; 825 key = row*Nbs + col + 1; 826 h1 = HASH(ht_size,key,tmp); 827 for (k=0; k<ht_size; k++) { 828 if (!HT[(h1+k)%ht_size]) { 829 HT[(h1+k)%ht_size] = key; 830 HD[(h1+k)%ht_size] = b->a + j*bs2; 831 break; 832 #if defined(PETSC_USE_INFO) 833 } else { 834 ct++; 835 #endif 836 } 837 } 838 #if defined(PETSC_USE_INFO) 839 if (k> max) max = k; 840 #endif 841 } 842 } 843 844 /* Print Summary */ 845 #if defined(PETSC_USE_INFO) 846 for (i=0,j=0; i<ht_size; i++) { 847 if (HT[i]) j++; 848 } 849 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 850 #endif 851 PetscFunctionReturn(0); 852 } 853 854 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 855 { 856 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 857 PetscErrorCode ierr; 858 PetscInt nstash,reallocs; 859 860 PetscFunctionBegin; 861 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 862 863 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 864 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 865 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 866 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 867 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 868 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 869 PetscFunctionReturn(0); 870 } 871 872 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 873 { 874 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 875 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 876 PetscErrorCode ierr; 877 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 878 PetscInt *row,*col; 879 PetscBool r1,r2,r3,other_disassembled; 880 MatScalar *val; 881 PetscMPIInt n; 882 883 PetscFunctionBegin; 884 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 885 if (!baij->donotstash && !mat->nooffprocentries) { 886 while (1) { 887 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 888 if (!flg) break; 889 890 for (i=0; i<n;) { 891 /* Now identify the consecutive vals belonging to the same row */ 892 for (j=i,rstart=row[j]; j<n; j++) { 893 if (row[j] != rstart) break; 894 } 895 if (j < n) ncols = j-i; 896 else ncols = n-i; 897 /* Now assemble all these values with a single function call */ 898 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 899 i = j; 900 } 901 } 902 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 903 /* Now process the block-stash. Since the values are stashed column-oriented, 904 set the roworiented flag to column oriented, and after MatSetValues() 905 restore the original flags */ 906 r1 = baij->roworiented; 907 r2 = a->roworiented; 908 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 909 910 baij->roworiented = PETSC_FALSE; 911 a->roworiented = PETSC_FALSE; 912 913 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 914 while (1) { 915 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 916 if (!flg) break; 917 918 for (i=0; i<n;) { 919 /* Now identify the consecutive vals belonging to the same row */ 920 for (j=i,rstart=row[j]; j<n; j++) { 921 if (row[j] != rstart) break; 922 } 923 if (j < n) ncols = j-i; 924 else ncols = n-i; 925 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 926 i = j; 927 } 928 } 929 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 930 931 baij->roworiented = r1; 932 a->roworiented = r2; 933 934 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 935 } 936 937 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 938 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 939 940 /* determine if any processor has disassembled, if so we must 941 also disassemble ourselfs, in order that we may reassemble. */ 942 /* 943 if nonzero structure of submatrix B cannot change then we know that 944 no processor disassembled thus we can skip this stuff 945 */ 946 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 947 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 948 if (mat->was_assembled && !other_disassembled) { 949 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 950 } 951 } 952 953 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 954 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 955 } 956 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 957 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 958 959 #if defined(PETSC_USE_INFO) 960 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 961 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 962 963 baij->ht_total_ct = 0; 964 baij->ht_insert_ct = 0; 965 } 966 #endif 967 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 968 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 969 970 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 971 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 972 } 973 974 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 975 976 baij->rowvalues = 0; 977 978 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 979 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 980 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 981 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 982 } 983 PetscFunctionReturn(0); 984 } 985 986 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer); 987 #include <petscdraw.h> 988 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 989 { 990 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 991 PetscErrorCode ierr; 992 PetscMPIInt rank = baij->rank; 993 PetscInt bs = mat->rmap->bs; 994 PetscBool iascii,isdraw; 995 PetscViewer sviewer; 996 PetscViewerFormat format; 997 998 PetscFunctionBegin; 999 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1000 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1001 if (iascii) { 1002 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1003 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1004 MatInfo info; 1005 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1006 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1007 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1008 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n", 1009 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr); 1010 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1011 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1012 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1013 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1014 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1015 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1016 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1017 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1018 PetscFunctionReturn(0); 1019 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1020 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1021 PetscFunctionReturn(0); 1022 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1023 PetscFunctionReturn(0); 1024 } 1025 } 1026 1027 if (isdraw) { 1028 PetscDraw draw; 1029 PetscBool isnull; 1030 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1031 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1032 if (isnull) PetscFunctionReturn(0); 1033 } 1034 1035 { 1036 /* assemble the entire matrix onto first processor. */ 1037 Mat A; 1038 Mat_SeqBAIJ *Aloc; 1039 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1040 MatScalar *a; 1041 const char *matname; 1042 1043 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1044 /* Perhaps this should be the type of mat? */ 1045 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1046 if (!rank) { 1047 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1048 } else { 1049 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1050 } 1051 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1052 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 1053 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1054 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1055 1056 /* copy over the A part */ 1057 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1058 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1059 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1060 1061 for (i=0; i<mbs; i++) { 1062 rvals[0] = bs*(baij->rstartbs + i); 1063 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1064 for (j=ai[i]; j<ai[i+1]; j++) { 1065 col = (baij->cstartbs+aj[j])*bs; 1066 for (k=0; k<bs; k++) { 1067 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1068 col++; a += bs; 1069 } 1070 } 1071 } 1072 /* copy over the B part */ 1073 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1074 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1075 for (i=0; i<mbs; i++) { 1076 rvals[0] = bs*(baij->rstartbs + i); 1077 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1078 for (j=ai[i]; j<ai[i+1]; j++) { 1079 col = baij->garray[aj[j]]*bs; 1080 for (k=0; k<bs; k++) { 1081 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1082 col++; a += bs; 1083 } 1084 } 1085 } 1086 ierr = PetscFree(rvals);CHKERRQ(ierr); 1087 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1088 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1089 /* 1090 Everyone has to call to draw the matrix since the graphics waits are 1091 synchronized across all processors that share the PetscDraw object 1092 */ 1093 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1094 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 1095 if (!rank) { 1096 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 1097 ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1098 } 1099 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1100 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1101 ierr = MatDestroy(&A);CHKERRQ(ierr); 1102 } 1103 PetscFunctionReturn(0); 1104 } 1105 1106 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1107 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1108 { 1109 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 1110 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)aij->A->data; 1111 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)aij->B->data; 1112 const PetscInt *garray = aij->garray; 1113 PetscInt header[4],M,N,m,rs,cs,bs,nz,cnt,i,j,ja,jb,k,l; 1114 PetscInt *rowlens,*colidxs; 1115 PetscScalar *matvals; 1116 PetscErrorCode ierr; 1117 1118 PetscFunctionBegin; 1119 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1120 1121 M = mat->rmap->N; 1122 N = mat->cmap->N; 1123 m = mat->rmap->n; 1124 rs = mat->rmap->rstart; 1125 cs = mat->cmap->rstart; 1126 bs = mat->rmap->bs; 1127 nz = bs*bs*(A->nz + B->nz); 1128 1129 /* write matrix header */ 1130 header[0] = MAT_FILE_CLASSID; 1131 header[1] = M; header[2] = N; header[3] = nz; 1132 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1133 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1134 1135 /* fill in and store row lengths */ 1136 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1137 for (cnt=0, i=0; i<A->mbs; i++) 1138 for (j=0; j<bs; j++) 1139 rowlens[cnt++] = bs*(A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]); 1140 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1141 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1142 1143 /* fill in and store column indices */ 1144 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1145 for (cnt=0, i=0; i<A->mbs; i++) { 1146 for (k=0; k<bs; k++) { 1147 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1148 if (garray[B->j[jb]] > cs/bs) break; 1149 for (l=0; l<bs; l++) 1150 colidxs[cnt++] = bs*garray[B->j[jb]] + l; 1151 } 1152 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1153 for (l=0; l<bs; l++) 1154 colidxs[cnt++] = bs*A->j[ja] + l + cs; 1155 for (; jb<B->i[i+1]; jb++) 1156 for (l=0; l<bs; l++) 1157 colidxs[cnt++] = bs*garray[B->j[jb]] + l; 1158 } 1159 } 1160 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1161 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_INT);CHKERRQ(ierr); 1162 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1163 1164 /* fill in and store nonzero values */ 1165 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1166 for (cnt=0, i=0; i<A->mbs; i++) { 1167 for (k=0; k<bs; k++) { 1168 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1169 if (garray[B->j[jb]] > cs/bs) break; 1170 for (l=0; l<bs; l++) 1171 matvals[cnt++] = B->a[bs*(bs*jb + l) + k]; 1172 } 1173 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1174 for (l=0; l<bs; l++) 1175 matvals[cnt++] = A->a[bs*(bs*ja + l) + k]; 1176 for (; jb<B->i[i+1]; jb++) 1177 for (l=0; l<bs; l++) 1178 matvals[cnt++] = B->a[bs*(bs*jb + l) + k]; 1179 } 1180 } 1181 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_SCALAR);CHKERRQ(ierr); 1182 ierr = PetscFree(matvals);CHKERRQ(ierr); 1183 1184 /* write block size option to the viewer's .info file */ 1185 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1186 PetscFunctionReturn(0); 1187 } 1188 1189 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1190 { 1191 PetscErrorCode ierr; 1192 PetscBool iascii,isdraw,issocket,isbinary; 1193 1194 PetscFunctionBegin; 1195 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1196 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1197 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1198 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1199 if (iascii || isdraw || issocket) { 1200 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1201 } else if (isbinary) { 1202 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1203 } 1204 PetscFunctionReturn(0); 1205 } 1206 1207 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1208 { 1209 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1210 PetscErrorCode ierr; 1211 1212 PetscFunctionBegin; 1213 #if defined(PETSC_USE_LOG) 1214 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1215 #endif 1216 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1217 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1218 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1219 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1220 #if defined(PETSC_USE_CTABLE) 1221 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1222 #else 1223 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1224 #endif 1225 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1226 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1227 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1228 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1229 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1230 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1231 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1232 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1233 1234 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1235 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1236 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1237 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1238 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1239 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1240 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr); 1241 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1242 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr); 1243 #if defined(PETSC_HAVE_HYPRE) 1244 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL);CHKERRQ(ierr); 1245 #endif 1246 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL);CHKERRQ(ierr); 1247 PetscFunctionReturn(0); 1248 } 1249 1250 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1251 { 1252 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1253 PetscErrorCode ierr; 1254 PetscInt nt; 1255 1256 PetscFunctionBegin; 1257 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1258 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1259 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1260 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1261 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1262 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1263 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1264 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1265 PetscFunctionReturn(0); 1266 } 1267 1268 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1269 { 1270 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1271 PetscErrorCode ierr; 1272 1273 PetscFunctionBegin; 1274 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1275 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1276 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1277 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1278 PetscFunctionReturn(0); 1279 } 1280 1281 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1282 { 1283 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1284 PetscErrorCode ierr; 1285 1286 PetscFunctionBegin; 1287 /* do nondiagonal part */ 1288 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1289 /* do local part */ 1290 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1291 /* add partial results together */ 1292 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1293 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1294 PetscFunctionReturn(0); 1295 } 1296 1297 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1298 { 1299 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1300 PetscErrorCode ierr; 1301 1302 PetscFunctionBegin; 1303 /* do nondiagonal part */ 1304 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1305 /* do local part */ 1306 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1307 /* add partial results together */ 1308 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1309 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1310 PetscFunctionReturn(0); 1311 } 1312 1313 /* 1314 This only works correctly for square matrices where the subblock A->A is the 1315 diagonal block 1316 */ 1317 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1318 { 1319 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1320 PetscErrorCode ierr; 1321 1322 PetscFunctionBegin; 1323 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1324 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1325 PetscFunctionReturn(0); 1326 } 1327 1328 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1329 { 1330 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1331 PetscErrorCode ierr; 1332 1333 PetscFunctionBegin; 1334 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1335 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1336 PetscFunctionReturn(0); 1337 } 1338 1339 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1340 { 1341 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1342 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1343 PetscErrorCode ierr; 1344 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1345 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1346 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1347 1348 PetscFunctionBegin; 1349 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1350 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1351 mat->getrowactive = PETSC_TRUE; 1352 1353 if (!mat->rowvalues && (idx || v)) { 1354 /* 1355 allocate enough space to hold information from the longest row. 1356 */ 1357 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1358 PetscInt max = 1,mbs = mat->mbs,tmp; 1359 for (i=0; i<mbs; i++) { 1360 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1361 if (max < tmp) max = tmp; 1362 } 1363 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1364 } 1365 lrow = row - brstart; 1366 1367 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1368 if (!v) {pvA = 0; pvB = 0;} 1369 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1370 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1371 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1372 nztot = nzA + nzB; 1373 1374 cmap = mat->garray; 1375 if (v || idx) { 1376 if (nztot) { 1377 /* Sort by increasing column numbers, assuming A and B already sorted */ 1378 PetscInt imark = -1; 1379 if (v) { 1380 *v = v_p = mat->rowvalues; 1381 for (i=0; i<nzB; i++) { 1382 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1383 else break; 1384 } 1385 imark = i; 1386 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1387 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1388 } 1389 if (idx) { 1390 *idx = idx_p = mat->rowindices; 1391 if (imark > -1) { 1392 for (i=0; i<imark; i++) { 1393 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1394 } 1395 } else { 1396 for (i=0; i<nzB; i++) { 1397 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1398 else break; 1399 } 1400 imark = i; 1401 } 1402 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1403 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1404 } 1405 } else { 1406 if (idx) *idx = 0; 1407 if (v) *v = 0; 1408 } 1409 } 1410 *nz = nztot; 1411 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1412 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1413 PetscFunctionReturn(0); 1414 } 1415 1416 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1417 { 1418 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1419 1420 PetscFunctionBegin; 1421 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1422 baij->getrowactive = PETSC_FALSE; 1423 PetscFunctionReturn(0); 1424 } 1425 1426 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1427 { 1428 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1429 PetscErrorCode ierr; 1430 1431 PetscFunctionBegin; 1432 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1433 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1434 PetscFunctionReturn(0); 1435 } 1436 1437 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1438 { 1439 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1440 Mat A = a->A,B = a->B; 1441 PetscErrorCode ierr; 1442 PetscLogDouble isend[5],irecv[5]; 1443 1444 PetscFunctionBegin; 1445 info->block_size = (PetscReal)matin->rmap->bs; 1446 1447 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1448 1449 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1450 isend[3] = info->memory; isend[4] = info->mallocs; 1451 1452 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1453 1454 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1455 isend[3] += info->memory; isend[4] += info->mallocs; 1456 1457 if (flag == MAT_LOCAL) { 1458 info->nz_used = isend[0]; 1459 info->nz_allocated = isend[1]; 1460 info->nz_unneeded = isend[2]; 1461 info->memory = isend[3]; 1462 info->mallocs = isend[4]; 1463 } else if (flag == MAT_GLOBAL_MAX) { 1464 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1465 1466 info->nz_used = irecv[0]; 1467 info->nz_allocated = irecv[1]; 1468 info->nz_unneeded = irecv[2]; 1469 info->memory = irecv[3]; 1470 info->mallocs = irecv[4]; 1471 } else if (flag == MAT_GLOBAL_SUM) { 1472 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1473 1474 info->nz_used = irecv[0]; 1475 info->nz_allocated = irecv[1]; 1476 info->nz_unneeded = irecv[2]; 1477 info->memory = irecv[3]; 1478 info->mallocs = irecv[4]; 1479 } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1480 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1481 info->fill_ratio_needed = 0; 1482 info->factor_mallocs = 0; 1483 PetscFunctionReturn(0); 1484 } 1485 1486 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1487 { 1488 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1489 PetscErrorCode ierr; 1490 1491 PetscFunctionBegin; 1492 switch (op) { 1493 case MAT_NEW_NONZERO_LOCATIONS: 1494 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1495 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1496 case MAT_KEEP_NONZERO_PATTERN: 1497 case MAT_NEW_NONZERO_LOCATION_ERR: 1498 MatCheckPreallocated(A,1); 1499 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1500 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1501 break; 1502 case MAT_ROW_ORIENTED: 1503 MatCheckPreallocated(A,1); 1504 a->roworiented = flg; 1505 1506 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1507 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1508 break; 1509 case MAT_NEW_DIAGONALS: 1510 case MAT_SORTED_FULL: 1511 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1512 break; 1513 case MAT_IGNORE_OFF_PROC_ENTRIES: 1514 a->donotstash = flg; 1515 break; 1516 case MAT_USE_HASH_TABLE: 1517 a->ht_flag = flg; 1518 a->ht_fact = 1.39; 1519 break; 1520 case MAT_SYMMETRIC: 1521 case MAT_STRUCTURALLY_SYMMETRIC: 1522 case MAT_HERMITIAN: 1523 case MAT_SUBMAT_SINGLEIS: 1524 case MAT_SYMMETRY_ETERNAL: 1525 MatCheckPreallocated(A,1); 1526 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1527 break; 1528 default: 1529 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1530 } 1531 PetscFunctionReturn(0); 1532 } 1533 1534 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1535 { 1536 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1537 Mat_SeqBAIJ *Aloc; 1538 Mat B; 1539 PetscErrorCode ierr; 1540 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1541 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1542 MatScalar *a; 1543 1544 PetscFunctionBegin; 1545 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1546 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1547 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1548 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1549 /* Do not know preallocation information, but must set block size */ 1550 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr); 1551 } else { 1552 B = *matout; 1553 } 1554 1555 /* copy over the A part */ 1556 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1557 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1558 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1559 1560 for (i=0; i<mbs; i++) { 1561 rvals[0] = bs*(baij->rstartbs + i); 1562 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1563 for (j=ai[i]; j<ai[i+1]; j++) { 1564 col = (baij->cstartbs+aj[j])*bs; 1565 for (k=0; k<bs; k++) { 1566 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1567 1568 col++; a += bs; 1569 } 1570 } 1571 } 1572 /* copy over the B part */ 1573 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1574 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1575 for (i=0; i<mbs; i++) { 1576 rvals[0] = bs*(baij->rstartbs + i); 1577 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1578 for (j=ai[i]; j<ai[i+1]; j++) { 1579 col = baij->garray[aj[j]]*bs; 1580 for (k=0; k<bs; k++) { 1581 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1582 col++; 1583 a += bs; 1584 } 1585 } 1586 } 1587 ierr = PetscFree(rvals);CHKERRQ(ierr); 1588 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1589 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1590 1591 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1592 else { 1593 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 1594 } 1595 PetscFunctionReturn(0); 1596 } 1597 1598 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1599 { 1600 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1601 Mat a = baij->A,b = baij->B; 1602 PetscErrorCode ierr; 1603 PetscInt s1,s2,s3; 1604 1605 PetscFunctionBegin; 1606 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1607 if (rr) { 1608 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1609 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1610 /* Overlap communication with computation. */ 1611 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1612 } 1613 if (ll) { 1614 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1615 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1616 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1617 } 1618 /* scale the diagonal block */ 1619 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1620 1621 if (rr) { 1622 /* Do a scatter end and then right scale the off-diagonal block */ 1623 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1624 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1625 } 1626 PetscFunctionReturn(0); 1627 } 1628 1629 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1630 { 1631 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1632 PetscInt *lrows; 1633 PetscInt r, len; 1634 PetscBool cong; 1635 PetscErrorCode ierr; 1636 1637 PetscFunctionBegin; 1638 /* get locally owned rows */ 1639 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 1640 /* fix right hand side if needed */ 1641 if (x && b) { 1642 const PetscScalar *xx; 1643 PetscScalar *bb; 1644 1645 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1646 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1647 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 1648 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1649 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1650 } 1651 1652 /* actually zap the local rows */ 1653 /* 1654 Zero the required rows. If the "diagonal block" of the matrix 1655 is square and the user wishes to set the diagonal we use separate 1656 code so that MatSetValues() is not called for each diagonal allocating 1657 new memory, thus calling lots of mallocs and slowing things down. 1658 1659 */ 1660 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1661 ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1662 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 1663 if ((diag != 0.0) && cong) { 1664 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr); 1665 } else if (diag != 0.0) { 1666 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr); 1667 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1668 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1669 for (r = 0; r < len; ++r) { 1670 const PetscInt row = lrows[r] + A->rmap->rstart; 1671 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1672 } 1673 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1674 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1675 } else { 1676 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1677 } 1678 ierr = PetscFree(lrows);CHKERRQ(ierr); 1679 1680 /* only change matrix nonzero state if pattern was allowed to be changed */ 1681 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1682 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1683 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1684 } 1685 PetscFunctionReturn(0); 1686 } 1687 1688 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1689 { 1690 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1691 PetscErrorCode ierr; 1692 PetscMPIInt n = A->rmap->n,p = 0; 1693 PetscInt i,j,k,r,len = 0,row,col,count; 1694 PetscInt *lrows,*owners = A->rmap->range; 1695 PetscSFNode *rrows; 1696 PetscSF sf; 1697 const PetscScalar *xx; 1698 PetscScalar *bb,*mask; 1699 Vec xmask,lmask; 1700 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)l->B->data; 1701 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1702 PetscScalar *aa; 1703 1704 PetscFunctionBegin; 1705 /* Create SF where leaves are input rows and roots are owned rows */ 1706 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1707 for (r = 0; r < n; ++r) lrows[r] = -1; 1708 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1709 for (r = 0; r < N; ++r) { 1710 const PetscInt idx = rows[r]; 1711 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1712 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1713 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1714 } 1715 rrows[r].rank = p; 1716 rrows[r].index = rows[r] - owners[p]; 1717 } 1718 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1719 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1720 /* Collect flags for rows to be zeroed */ 1721 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1722 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1723 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1724 /* Compress and put in row numbers */ 1725 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1726 /* zero diagonal part of matrix */ 1727 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1728 /* handle off diagonal part of matrix */ 1729 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1730 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1731 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1732 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1733 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1734 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1735 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1736 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1737 if (x) { 1738 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1739 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1740 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1741 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1742 } 1743 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1744 /* remove zeroed rows of off diagonal matrix */ 1745 for (i = 0; i < len; ++i) { 1746 row = lrows[i]; 1747 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1748 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 1749 for (k = 0; k < count; ++k) { 1750 aa[0] = 0.0; 1751 aa += bs; 1752 } 1753 } 1754 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1755 for (i = 0; i < l->B->rmap->N; ++i) { 1756 row = i/bs; 1757 for (j = baij->i[row]; j < baij->i[row+1]; ++j) { 1758 for (k = 0; k < bs; ++k) { 1759 col = bs*baij->j[j] + k; 1760 if (PetscAbsScalar(mask[col])) { 1761 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 1762 if (x) bb[i] -= aa[0]*xx[col]; 1763 aa[0] = 0.0; 1764 } 1765 } 1766 } 1767 } 1768 if (x) { 1769 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1770 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1771 } 1772 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1773 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1774 ierr = PetscFree(lrows);CHKERRQ(ierr); 1775 1776 /* only change matrix nonzero state if pattern was allowed to be changed */ 1777 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1778 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1779 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1780 } 1781 PetscFunctionReturn(0); 1782 } 1783 1784 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1785 { 1786 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1787 PetscErrorCode ierr; 1788 1789 PetscFunctionBegin; 1790 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1791 PetscFunctionReturn(0); 1792 } 1793 1794 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 1795 1796 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1797 { 1798 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1799 Mat a,b,c,d; 1800 PetscBool flg; 1801 PetscErrorCode ierr; 1802 1803 PetscFunctionBegin; 1804 a = matA->A; b = matA->B; 1805 c = matB->A; d = matB->B; 1806 1807 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1808 if (flg) { 1809 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1810 } 1811 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1812 PetscFunctionReturn(0); 1813 } 1814 1815 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1816 { 1817 PetscErrorCode ierr; 1818 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1819 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 1820 1821 PetscFunctionBegin; 1822 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1823 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1824 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1825 } else { 1826 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1827 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1828 } 1829 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 1830 PetscFunctionReturn(0); 1831 } 1832 1833 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1834 { 1835 PetscErrorCode ierr; 1836 1837 PetscFunctionBegin; 1838 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1839 PetscFunctionReturn(0); 1840 } 1841 1842 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 1843 { 1844 PetscErrorCode ierr; 1845 PetscInt bs = Y->rmap->bs,m = Y->rmap->N/bs; 1846 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 1847 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 1848 1849 PetscFunctionBegin; 1850 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 1851 PetscFunctionReturn(0); 1852 } 1853 1854 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1855 { 1856 PetscErrorCode ierr; 1857 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 1858 PetscBLASInt bnz,one=1; 1859 Mat_SeqBAIJ *x,*y; 1860 PetscInt bs2 = Y->rmap->bs*Y->rmap->bs; 1861 1862 PetscFunctionBegin; 1863 if (str == SAME_NONZERO_PATTERN) { 1864 PetscScalar alpha = a; 1865 x = (Mat_SeqBAIJ*)xx->A->data; 1866 y = (Mat_SeqBAIJ*)yy->A->data; 1867 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 1868 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1869 x = (Mat_SeqBAIJ*)xx->B->data; 1870 y = (Mat_SeqBAIJ*)yy->B->data; 1871 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 1872 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1873 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1874 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1875 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1876 } else { 1877 Mat B; 1878 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1879 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1880 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1881 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1882 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1883 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1884 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1885 ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr); 1886 ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1887 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1888 ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1889 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1890 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1891 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 1892 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1893 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1894 } 1895 PetscFunctionReturn(0); 1896 } 1897 1898 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1899 { 1900 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1901 PetscErrorCode ierr; 1902 1903 PetscFunctionBegin; 1904 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1905 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1906 PetscFunctionReturn(0); 1907 } 1908 1909 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1910 { 1911 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1912 PetscErrorCode ierr; 1913 1914 PetscFunctionBegin; 1915 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1916 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1917 PetscFunctionReturn(0); 1918 } 1919 1920 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1921 { 1922 PetscErrorCode ierr; 1923 IS iscol_local; 1924 PetscInt csize; 1925 1926 PetscFunctionBegin; 1927 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1928 if (call == MAT_REUSE_MATRIX) { 1929 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1930 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1931 } else { 1932 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1933 } 1934 ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1935 if (call == MAT_INITIAL_MATRIX) { 1936 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1937 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1938 } 1939 PetscFunctionReturn(0); 1940 } 1941 1942 /* 1943 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1944 in local and then by concatenating the local matrices the end result. 1945 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1946 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1947 */ 1948 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 1949 { 1950 PetscErrorCode ierr; 1951 PetscMPIInt rank,size; 1952 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 1953 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 1954 Mat M,Mreuse; 1955 MatScalar *vwork,*aa; 1956 MPI_Comm comm; 1957 IS isrow_new, iscol_new; 1958 Mat_SeqBAIJ *aij; 1959 1960 PetscFunctionBegin; 1961 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 1962 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1963 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1964 /* The compression and expansion should be avoided. Doesn't point 1965 out errors, might change the indices, hence buggey */ 1966 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 1967 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 1968 1969 if (call == MAT_REUSE_MATRIX) { 1970 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 1971 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1972 ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse);CHKERRQ(ierr); 1973 } else { 1974 ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse);CHKERRQ(ierr); 1975 } 1976 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 1977 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 1978 /* 1979 m - number of local rows 1980 n - number of columns (same on all processors) 1981 rstart - first row in new global matrix generated 1982 */ 1983 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 1984 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1985 m = m/bs; 1986 n = n/bs; 1987 1988 if (call == MAT_INITIAL_MATRIX) { 1989 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1990 ii = aij->i; 1991 jj = aij->j; 1992 1993 /* 1994 Determine the number of non-zeros in the diagonal and off-diagonal 1995 portions of the matrix in order to do correct preallocation 1996 */ 1997 1998 /* first get start and end of "diagonal" columns */ 1999 if (csize == PETSC_DECIDE) { 2000 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2001 if (mglobal == n*bs) { /* square matrix */ 2002 nlocal = m; 2003 } else { 2004 nlocal = n/size + ((n % size) > rank); 2005 } 2006 } else { 2007 nlocal = csize/bs; 2008 } 2009 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2010 rstart = rend - nlocal; 2011 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2012 2013 /* next, compute all the lengths */ 2014 ierr = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr); 2015 for (i=0; i<m; i++) { 2016 jend = ii[i+1] - ii[i]; 2017 olen = 0; 2018 dlen = 0; 2019 for (j=0; j<jend; j++) { 2020 if (*jj < rstart || *jj >= rend) olen++; 2021 else dlen++; 2022 jj++; 2023 } 2024 olens[i] = olen; 2025 dlens[i] = dlen; 2026 } 2027 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2028 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2029 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2030 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2031 ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2032 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 2033 } else { 2034 PetscInt ml,nl; 2035 2036 M = *newmat; 2037 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2038 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2039 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2040 /* 2041 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2042 rather than the slower MatSetValues(). 2043 */ 2044 M->was_assembled = PETSC_TRUE; 2045 M->assembled = PETSC_FALSE; 2046 } 2047 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2048 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2049 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2050 ii = aij->i; 2051 jj = aij->j; 2052 aa = aij->a; 2053 for (i=0; i<m; i++) { 2054 row = rstart/bs + i; 2055 nz = ii[i+1] - ii[i]; 2056 cwork = jj; jj += nz; 2057 vwork = aa; aa += nz*bs*bs; 2058 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2059 } 2060 2061 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2062 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2063 *newmat = M; 2064 2065 /* save submatrix used in processor for next request */ 2066 if (call == MAT_INITIAL_MATRIX) { 2067 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2068 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2069 } 2070 PetscFunctionReturn(0); 2071 } 2072 2073 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2074 { 2075 MPI_Comm comm,pcomm; 2076 PetscInt clocal_size,nrows; 2077 const PetscInt *rows; 2078 PetscMPIInt size; 2079 IS crowp,lcolp; 2080 PetscErrorCode ierr; 2081 2082 PetscFunctionBegin; 2083 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2084 /* make a collective version of 'rowp' */ 2085 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2086 if (pcomm==comm) { 2087 crowp = rowp; 2088 } else { 2089 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2090 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2091 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2092 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2093 } 2094 ierr = ISSetPermutation(crowp);CHKERRQ(ierr); 2095 /* make a local version of 'colp' */ 2096 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2097 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2098 if (size==1) { 2099 lcolp = colp; 2100 } else { 2101 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 2102 } 2103 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2104 /* now we just get the submatrix */ 2105 ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr); 2106 ierr = MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2107 /* clean up */ 2108 if (pcomm!=comm) { 2109 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2110 } 2111 if (size>1) { 2112 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2113 } 2114 PetscFunctionReturn(0); 2115 } 2116 2117 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2118 { 2119 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2120 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2121 2122 PetscFunctionBegin; 2123 if (nghosts) *nghosts = B->nbs; 2124 if (ghosts) *ghosts = baij->garray; 2125 PetscFunctionReturn(0); 2126 } 2127 2128 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2129 { 2130 Mat B; 2131 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2132 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2133 Mat_SeqAIJ *b; 2134 PetscErrorCode ierr; 2135 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2136 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2137 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2138 2139 PetscFunctionBegin; 2140 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2141 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2142 2143 /* ---------------------------------------------------------------- 2144 Tell every processor the number of nonzeros per row 2145 */ 2146 ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr); 2147 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2148 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2149 } 2150 ierr = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr); 2151 displs = recvcounts + size; 2152 for (i=0; i<size; i++) { 2153 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2154 displs[i] = A->rmap->range[i]/bs; 2155 } 2156 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2157 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2158 #else 2159 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2160 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2161 #endif 2162 /* --------------------------------------------------------------- 2163 Create the sequential matrix of the same type as the local block diagonal 2164 */ 2165 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2166 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2167 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2168 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2169 b = (Mat_SeqAIJ*)B->data; 2170 2171 /*-------------------------------------------------------------------- 2172 Copy my part of matrix column indices over 2173 */ 2174 sendcount = ad->nz + bd->nz; 2175 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2176 a_jsendbuf = ad->j; 2177 b_jsendbuf = bd->j; 2178 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2179 cnt = 0; 2180 for (i=0; i<n; i++) { 2181 2182 /* put in lower diagonal portion */ 2183 m = bd->i[i+1] - bd->i[i]; 2184 while (m > 0) { 2185 /* is it above diagonal (in bd (compressed) numbering) */ 2186 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2187 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2188 m--; 2189 } 2190 2191 /* put in diagonal portion */ 2192 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2193 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2194 } 2195 2196 /* put in upper diagonal portion */ 2197 while (m-- > 0) { 2198 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2199 } 2200 } 2201 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2202 2203 /*-------------------------------------------------------------------- 2204 Gather all column indices to all processors 2205 */ 2206 for (i=0; i<size; i++) { 2207 recvcounts[i] = 0; 2208 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2209 recvcounts[i] += lens[j]; 2210 } 2211 } 2212 displs[0] = 0; 2213 for (i=1; i<size; i++) { 2214 displs[i] = displs[i-1] + recvcounts[i-1]; 2215 } 2216 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2217 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2218 #else 2219 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2220 #endif 2221 /*-------------------------------------------------------------------- 2222 Assemble the matrix into useable form (note numerical values not yet set) 2223 */ 2224 /* set the b->ilen (length of each row) values */ 2225 ierr = PetscArraycpy(b->ilen,lens,A->rmap->N/bs);CHKERRQ(ierr); 2226 /* set the b->i indices */ 2227 b->i[0] = 0; 2228 for (i=1; i<=A->rmap->N/bs; i++) { 2229 b->i[i] = b->i[i-1] + lens[i-1]; 2230 } 2231 ierr = PetscFree(lens);CHKERRQ(ierr); 2232 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2233 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2234 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2235 2236 if (A->symmetric) { 2237 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2238 } else if (A->hermitian) { 2239 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2240 } else if (A->structurally_symmetric) { 2241 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2242 } 2243 *newmat = B; 2244 PetscFunctionReturn(0); 2245 } 2246 2247 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2248 { 2249 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2250 PetscErrorCode ierr; 2251 Vec bb1 = 0; 2252 2253 PetscFunctionBegin; 2254 if (flag == SOR_APPLY_UPPER) { 2255 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2256 PetscFunctionReturn(0); 2257 } 2258 2259 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2260 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2261 } 2262 2263 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2264 if (flag & SOR_ZERO_INITIAL_GUESS) { 2265 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2266 its--; 2267 } 2268 2269 while (its--) { 2270 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2271 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2272 2273 /* update rhs: bb1 = bb - B*x */ 2274 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2275 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2276 2277 /* local sweep */ 2278 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2279 } 2280 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2281 if (flag & SOR_ZERO_INITIAL_GUESS) { 2282 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2283 its--; 2284 } 2285 while (its--) { 2286 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2287 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2288 2289 /* update rhs: bb1 = bb - B*x */ 2290 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2291 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2292 2293 /* local sweep */ 2294 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2295 } 2296 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2297 if (flag & SOR_ZERO_INITIAL_GUESS) { 2298 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2299 its--; 2300 } 2301 while (its--) { 2302 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2303 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2304 2305 /* update rhs: bb1 = bb - B*x */ 2306 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2307 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2308 2309 /* local sweep */ 2310 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2311 } 2312 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2313 2314 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2315 PetscFunctionReturn(0); 2316 } 2317 2318 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms) 2319 { 2320 PetscErrorCode ierr; 2321 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data; 2322 PetscInt N,i,*garray = aij->garray; 2323 PetscInt ib,jb,bs = A->rmap->bs; 2324 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data; 2325 MatScalar *a_val = a_aij->a; 2326 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data; 2327 MatScalar *b_val = b_aij->a; 2328 PetscReal *work; 2329 2330 PetscFunctionBegin; 2331 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 2332 ierr = PetscCalloc1(N,&work);CHKERRQ(ierr); 2333 if (type == NORM_2) { 2334 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2335 for (jb=0; jb<bs; jb++) { 2336 for (ib=0; ib<bs; ib++) { 2337 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2338 a_val++; 2339 } 2340 } 2341 } 2342 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2343 for (jb=0; jb<bs; jb++) { 2344 for (ib=0; ib<bs; ib++) { 2345 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2346 b_val++; 2347 } 2348 } 2349 } 2350 } else if (type == NORM_1) { 2351 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2352 for (jb=0; jb<bs; jb++) { 2353 for (ib=0; ib<bs; ib++) { 2354 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2355 a_val++; 2356 } 2357 } 2358 } 2359 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2360 for (jb=0; jb<bs; jb++) { 2361 for (ib=0; ib<bs; ib++) { 2362 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2363 b_val++; 2364 } 2365 } 2366 } 2367 } else if (type == NORM_INFINITY) { 2368 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2369 for (jb=0; jb<bs; jb++) { 2370 for (ib=0; ib<bs; ib++) { 2371 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2372 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2373 a_val++; 2374 } 2375 } 2376 } 2377 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2378 for (jb=0; jb<bs; jb++) { 2379 for (ib=0; ib<bs; ib++) { 2380 int col = garray[b_aij->j[i]] * bs + jb; 2381 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2382 b_val++; 2383 } 2384 } 2385 } 2386 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 2387 if (type == NORM_INFINITY) { 2388 ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2389 } else { 2390 ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2391 } 2392 ierr = PetscFree(work);CHKERRQ(ierr); 2393 if (type == NORM_2) { 2394 for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]); 2395 } 2396 PetscFunctionReturn(0); 2397 } 2398 2399 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2400 { 2401 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2402 PetscErrorCode ierr; 2403 2404 PetscFunctionBegin; 2405 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2406 A->factorerrortype = a->A->factorerrortype; 2407 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2408 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2409 PetscFunctionReturn(0); 2410 } 2411 2412 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a) 2413 { 2414 PetscErrorCode ierr; 2415 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ*)Y->data; 2416 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)maij->A->data; 2417 2418 PetscFunctionBegin; 2419 if (!Y->preallocated) { 2420 ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 2421 } else if (!aij->nz) { 2422 PetscInt nonew = aij->nonew; 2423 ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2424 aij->nonew = nonew; 2425 } 2426 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2427 PetscFunctionReturn(0); 2428 } 2429 2430 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool *missing,PetscInt *d) 2431 { 2432 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2433 PetscErrorCode ierr; 2434 2435 PetscFunctionBegin; 2436 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2437 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2438 if (d) { 2439 PetscInt rstart; 2440 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2441 *d += rstart/A->rmap->bs; 2442 2443 } 2444 PetscFunctionReturn(0); 2445 } 2446 2447 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2448 { 2449 PetscFunctionBegin; 2450 *a = ((Mat_MPIBAIJ*)A->data)->A; 2451 PetscFunctionReturn(0); 2452 } 2453 2454 /* -------------------------------------------------------------------*/ 2455 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2456 MatGetRow_MPIBAIJ, 2457 MatRestoreRow_MPIBAIJ, 2458 MatMult_MPIBAIJ, 2459 /* 4*/ MatMultAdd_MPIBAIJ, 2460 MatMultTranspose_MPIBAIJ, 2461 MatMultTransposeAdd_MPIBAIJ, 2462 0, 2463 0, 2464 0, 2465 /*10*/ 0, 2466 0, 2467 0, 2468 MatSOR_MPIBAIJ, 2469 MatTranspose_MPIBAIJ, 2470 /*15*/ MatGetInfo_MPIBAIJ, 2471 MatEqual_MPIBAIJ, 2472 MatGetDiagonal_MPIBAIJ, 2473 MatDiagonalScale_MPIBAIJ, 2474 MatNorm_MPIBAIJ, 2475 /*20*/ MatAssemblyBegin_MPIBAIJ, 2476 MatAssemblyEnd_MPIBAIJ, 2477 MatSetOption_MPIBAIJ, 2478 MatZeroEntries_MPIBAIJ, 2479 /*24*/ MatZeroRows_MPIBAIJ, 2480 0, 2481 0, 2482 0, 2483 0, 2484 /*29*/ MatSetUp_MPIBAIJ, 2485 0, 2486 0, 2487 MatGetDiagonalBlock_MPIBAIJ, 2488 0, 2489 /*34*/ MatDuplicate_MPIBAIJ, 2490 0, 2491 0, 2492 0, 2493 0, 2494 /*39*/ MatAXPY_MPIBAIJ, 2495 MatCreateSubMatrices_MPIBAIJ, 2496 MatIncreaseOverlap_MPIBAIJ, 2497 MatGetValues_MPIBAIJ, 2498 MatCopy_MPIBAIJ, 2499 /*44*/ 0, 2500 MatScale_MPIBAIJ, 2501 MatShift_MPIBAIJ, 2502 0, 2503 MatZeroRowsColumns_MPIBAIJ, 2504 /*49*/ 0, 2505 0, 2506 0, 2507 0, 2508 0, 2509 /*54*/ MatFDColoringCreate_MPIXAIJ, 2510 0, 2511 MatSetUnfactored_MPIBAIJ, 2512 MatPermute_MPIBAIJ, 2513 MatSetValuesBlocked_MPIBAIJ, 2514 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2515 MatDestroy_MPIBAIJ, 2516 MatView_MPIBAIJ, 2517 0, 2518 0, 2519 /*64*/ 0, 2520 0, 2521 0, 2522 0, 2523 0, 2524 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2525 0, 2526 0, 2527 0, 2528 0, 2529 /*74*/ 0, 2530 MatFDColoringApply_BAIJ, 2531 0, 2532 0, 2533 0, 2534 /*79*/ 0, 2535 0, 2536 0, 2537 0, 2538 MatLoad_MPIBAIJ, 2539 /*84*/ 0, 2540 0, 2541 0, 2542 0, 2543 0, 2544 /*89*/ 0, 2545 0, 2546 0, 2547 0, 2548 0, 2549 /*94*/ 0, 2550 0, 2551 0, 2552 0, 2553 0, 2554 /*99*/ 0, 2555 0, 2556 0, 2557 0, 2558 0, 2559 /*104*/0, 2560 MatRealPart_MPIBAIJ, 2561 MatImaginaryPart_MPIBAIJ, 2562 0, 2563 0, 2564 /*109*/0, 2565 0, 2566 0, 2567 0, 2568 MatMissingDiagonal_MPIBAIJ, 2569 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2570 0, 2571 MatGetGhosts_MPIBAIJ, 2572 0, 2573 0, 2574 /*119*/0, 2575 0, 2576 0, 2577 0, 2578 MatGetMultiProcBlock_MPIBAIJ, 2579 /*124*/0, 2580 MatGetColumnNorms_MPIBAIJ, 2581 MatInvertBlockDiagonal_MPIBAIJ, 2582 0, 2583 0, 2584 /*129*/ 0, 2585 0, 2586 0, 2587 0, 2588 0, 2589 /*134*/ 0, 2590 0, 2591 0, 2592 0, 2593 0, 2594 /*139*/ MatSetBlockSizes_Default, 2595 0, 2596 0, 2597 MatFDColoringSetUp_MPIXAIJ, 2598 0, 2599 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ 2600 }; 2601 2602 2603 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 2604 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 2605 2606 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2607 { 2608 PetscInt m,rstart,cstart,cend; 2609 PetscInt i,j,dlen,olen,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2610 const PetscInt *JJ =0; 2611 PetscScalar *values=0; 2612 PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented; 2613 PetscErrorCode ierr; 2614 PetscBool nooffprocentries; 2615 2616 PetscFunctionBegin; 2617 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2618 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2619 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2620 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2621 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2622 m = B->rmap->n/bs; 2623 rstart = B->rmap->rstart/bs; 2624 cstart = B->cmap->rstart/bs; 2625 cend = B->cmap->rend/bs; 2626 2627 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2628 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2629 for (i=0; i<m; i++) { 2630 nz = ii[i+1] - ii[i]; 2631 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2632 nz_max = PetscMax(nz_max,nz); 2633 dlen = 0; 2634 olen = 0; 2635 JJ = jj + ii[i]; 2636 for (j=0; j<nz; j++) { 2637 if (*JJ < cstart || *JJ >= cend) olen++; 2638 else dlen++; 2639 JJ++; 2640 } 2641 d_nnz[i] = dlen; 2642 o_nnz[i] = olen; 2643 } 2644 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2645 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2646 2647 values = (PetscScalar*)V; 2648 if (!values) { 2649 ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2650 } 2651 for (i=0; i<m; i++) { 2652 PetscInt row = i + rstart; 2653 PetscInt ncols = ii[i+1] - ii[i]; 2654 const PetscInt *icols = jj + ii[i]; 2655 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2656 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2657 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2658 } else { /* block ordering does not match so we can only insert one block at a time. */ 2659 PetscInt j; 2660 for (j=0; j<ncols; j++) { 2661 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2662 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2663 } 2664 } 2665 } 2666 2667 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2668 nooffprocentries = B->nooffprocentries; 2669 B->nooffprocentries = PETSC_TRUE; 2670 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2671 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2672 B->nooffprocentries = nooffprocentries; 2673 2674 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2675 PetscFunctionReturn(0); 2676 } 2677 2678 /*@C 2679 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values 2680 2681 Collective 2682 2683 Input Parameters: 2684 + B - the matrix 2685 . bs - the block size 2686 . i - the indices into j for the start of each local row (starts with zero) 2687 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2688 - v - optional values in the matrix 2689 2690 Level: advanced 2691 2692 Notes: 2693 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 2694 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 2695 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2696 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2697 block column and the second index is over columns within a block. 2698 2699 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2700 2701 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ 2702 @*/ 2703 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2704 { 2705 PetscErrorCode ierr; 2706 2707 PetscFunctionBegin; 2708 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2709 PetscValidType(B,1); 2710 PetscValidLogicalCollectiveInt(B,bs,2); 2711 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2712 PetscFunctionReturn(0); 2713 } 2714 2715 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2716 { 2717 Mat_MPIBAIJ *b; 2718 PetscErrorCode ierr; 2719 PetscInt i; 2720 PetscMPIInt size; 2721 2722 PetscFunctionBegin; 2723 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2724 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2725 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2726 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2727 2728 if (d_nnz) { 2729 for (i=0; i<B->rmap->n/bs; i++) { 2730 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2731 } 2732 } 2733 if (o_nnz) { 2734 for (i=0; i<B->rmap->n/bs; i++) { 2735 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2736 } 2737 } 2738 2739 b = (Mat_MPIBAIJ*)B->data; 2740 b->bs2 = bs*bs; 2741 b->mbs = B->rmap->n/bs; 2742 b->nbs = B->cmap->n/bs; 2743 b->Mbs = B->rmap->N/bs; 2744 b->Nbs = B->cmap->N/bs; 2745 2746 for (i=0; i<=b->size; i++) { 2747 b->rangebs[i] = B->rmap->range[i]/bs; 2748 } 2749 b->rstartbs = B->rmap->rstart/bs; 2750 b->rendbs = B->rmap->rend/bs; 2751 b->cstartbs = B->cmap->rstart/bs; 2752 b->cendbs = B->cmap->rend/bs; 2753 2754 #if defined(PETSC_USE_CTABLE) 2755 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2756 #else 2757 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2758 #endif 2759 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2760 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2761 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2762 2763 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2764 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2765 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2766 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2767 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2768 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2769 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2770 2771 if (!B->preallocated) { 2772 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2773 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2774 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2775 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2776 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2777 } 2778 2779 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2780 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2781 B->preallocated = PETSC_TRUE; 2782 B->was_assembled = PETSC_FALSE; 2783 B->assembled = PETSC_FALSE; 2784 PetscFunctionReturn(0); 2785 } 2786 2787 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2788 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2789 2790 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 2791 { 2792 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2793 PetscErrorCode ierr; 2794 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2795 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2796 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2797 2798 PetscFunctionBegin; 2799 ierr = PetscMalloc1(M+1,&ii);CHKERRQ(ierr); 2800 ii[0] = 0; 2801 for (i=0; i<M; i++) { 2802 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2803 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 2804 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2805 /* remove one from count of matrix has diagonal */ 2806 for (j=id[i]; j<id[i+1]; j++) { 2807 if (jd[j] == i) {ii[i+1]--;break;} 2808 } 2809 } 2810 ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr); 2811 cnt = 0; 2812 for (i=0; i<M; i++) { 2813 for (j=io[i]; j<io[i+1]; j++) { 2814 if (garray[jo[j]] > rstart) break; 2815 jj[cnt++] = garray[jo[j]]; 2816 } 2817 for (k=id[i]; k<id[i+1]; k++) { 2818 if (jd[k] != i) { 2819 jj[cnt++] = rstart + jd[k]; 2820 } 2821 } 2822 for (; j<io[i+1]; j++) { 2823 jj[cnt++] = garray[jo[j]]; 2824 } 2825 } 2826 ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr); 2827 PetscFunctionReturn(0); 2828 } 2829 2830 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2831 2832 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 2833 2834 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 2835 { 2836 PetscErrorCode ierr; 2837 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2838 Mat B; 2839 Mat_MPIAIJ *b; 2840 2841 PetscFunctionBegin; 2842 if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 2843 2844 if (reuse == MAT_REUSE_MATRIX) { 2845 B = *newmat; 2846 } else { 2847 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2848 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2849 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2850 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2851 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 2852 ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 2853 } 2854 b = (Mat_MPIAIJ*) B->data; 2855 2856 if (reuse == MAT_REUSE_MATRIX) { 2857 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A);CHKERRQ(ierr); 2858 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B);CHKERRQ(ierr); 2859 } else { 2860 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 2861 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2862 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 2863 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 2864 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 2865 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2866 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2867 } 2868 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2869 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2870 2871 if (reuse == MAT_INPLACE_MATRIX) { 2872 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 2873 } else { 2874 *newmat = B; 2875 } 2876 PetscFunctionReturn(0); 2877 } 2878 2879 /*MC 2880 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2881 2882 Options Database Keys: 2883 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2884 . -mat_block_size <bs> - set the blocksize used to store the matrix 2885 - -mat_use_hash_table <fact> 2886 2887 Level: beginner 2888 2889 Notes: 2890 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 2891 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 2892 2893 .seealso: MatCreateBAIJ 2894 M*/ 2895 2896 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 2897 2898 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2899 { 2900 Mat_MPIBAIJ *b; 2901 PetscErrorCode ierr; 2902 PetscBool flg = PETSC_FALSE; 2903 2904 PetscFunctionBegin; 2905 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2906 B->data = (void*)b; 2907 2908 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2909 B->assembled = PETSC_FALSE; 2910 2911 B->insertmode = NOT_SET_VALUES; 2912 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2913 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2914 2915 /* build local table of row and column ownerships */ 2916 ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr); 2917 2918 /* build cache for off array entries formed */ 2919 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 2920 2921 b->donotstash = PETSC_FALSE; 2922 b->colmap = NULL; 2923 b->garray = NULL; 2924 b->roworiented = PETSC_TRUE; 2925 2926 /* stuff used in block assembly */ 2927 b->barray = 0; 2928 2929 /* stuff used for matrix vector multiply */ 2930 b->lvec = 0; 2931 b->Mvctx = 0; 2932 2933 /* stuff for MatGetRow() */ 2934 b->rowindices = 0; 2935 b->rowvalues = 0; 2936 b->getrowactive = PETSC_FALSE; 2937 2938 /* hash table stuff */ 2939 b->ht = 0; 2940 b->hd = 0; 2941 b->ht_size = 0; 2942 b->ht_flag = PETSC_FALSE; 2943 b->ht_fact = 0; 2944 b->ht_total_ct = 0; 2945 b->ht_insert_ct = 0; 2946 2947 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2948 b->ijonly = PETSC_FALSE; 2949 2950 2951 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 2952 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 2953 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 2954 #if defined(PETSC_HAVE_HYPRE) 2955 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 2956 #endif 2957 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2958 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2959 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2960 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 2961 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2962 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2963 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 2964 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 2965 2966 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 2967 ierr = PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg);CHKERRQ(ierr); 2968 if (flg) { 2969 PetscReal fact = 1.39; 2970 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2971 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 2972 if (fact <= 1.0) fact = 1.39; 2973 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2974 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2975 } 2976 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2977 PetscFunctionReturn(0); 2978 } 2979 2980 /*MC 2981 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2982 2983 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2984 and MATMPIBAIJ otherwise. 2985 2986 Options Database Keys: 2987 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2988 2989 Level: beginner 2990 2991 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2992 M*/ 2993 2994 /*@C 2995 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2996 (block compressed row). For good matrix assembly performance 2997 the user should preallocate the matrix storage by setting the parameters 2998 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2999 performance can be increased by more than a factor of 50. 3000 3001 Collective on Mat 3002 3003 Input Parameters: 3004 + B - the matrix 3005 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3006 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3007 . d_nz - number of block nonzeros per block row in diagonal portion of local 3008 submatrix (same for all local rows) 3009 . d_nnz - array containing the number of block nonzeros in the various block rows 3010 of the in diagonal portion of the local (possibly different for each block 3011 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 3012 set it even if it is zero. 3013 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3014 submatrix (same for all local rows). 3015 - o_nnz - array containing the number of nonzeros in the various block rows of the 3016 off-diagonal portion of the local submatrix (possibly different for 3017 each block row) or NULL. 3018 3019 If the *_nnz parameter is given then the *_nz parameter is ignored 3020 3021 Options Database Keys: 3022 + -mat_block_size - size of the blocks to use 3023 - -mat_use_hash_table <fact> 3024 3025 Notes: 3026 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3027 than it must be used on all processors that share the object for that argument. 3028 3029 Storage Information: 3030 For a square global matrix we define each processor's diagonal portion 3031 to be its local rows and the corresponding columns (a square submatrix); 3032 each processor's off-diagonal portion encompasses the remainder of the 3033 local matrix (a rectangular submatrix). 3034 3035 The user can specify preallocated storage for the diagonal part of 3036 the local submatrix with either d_nz or d_nnz (not both). Set 3037 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3038 memory allocation. Likewise, specify preallocated storage for the 3039 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3040 3041 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3042 the figure below we depict these three local rows and all columns (0-11). 3043 3044 .vb 3045 0 1 2 3 4 5 6 7 8 9 10 11 3046 -------------------------- 3047 row 3 |o o o d d d o o o o o o 3048 row 4 |o o o d d d o o o o o o 3049 row 5 |o o o d d d o o o o o o 3050 -------------------------- 3051 .ve 3052 3053 Thus, any entries in the d locations are stored in the d (diagonal) 3054 submatrix, and any entries in the o locations are stored in the 3055 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3056 stored simply in the MATSEQBAIJ format for compressed row storage. 3057 3058 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3059 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3060 In general, for PDE problems in which most nonzeros are near the diagonal, 3061 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3062 or you will get TERRIBLE performance; see the users' manual chapter on 3063 matrices. 3064 3065 You can call MatGetInfo() to get information on how effective the preallocation was; 3066 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3067 You can also run with the option -info and look for messages with the string 3068 malloc in them to see if additional memory allocation was needed. 3069 3070 Level: intermediate 3071 3072 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3073 @*/ 3074 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3075 { 3076 PetscErrorCode ierr; 3077 3078 PetscFunctionBegin; 3079 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3080 PetscValidType(B,1); 3081 PetscValidLogicalCollectiveInt(B,bs,2); 3082 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3083 PetscFunctionReturn(0); 3084 } 3085 3086 /*@C 3087 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3088 (block compressed row). For good matrix assembly performance 3089 the user should preallocate the matrix storage by setting the parameters 3090 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3091 performance can be increased by more than a factor of 50. 3092 3093 Collective 3094 3095 Input Parameters: 3096 + comm - MPI communicator 3097 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3098 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3099 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3100 This value should be the same as the local size used in creating the 3101 y vector for the matrix-vector product y = Ax. 3102 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3103 This value should be the same as the local size used in creating the 3104 x vector for the matrix-vector product y = Ax. 3105 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3106 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3107 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3108 submatrix (same for all local rows) 3109 . d_nnz - array containing the number of nonzero blocks in the various block rows 3110 of the in diagonal portion of the local (possibly different for each block 3111 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3112 and set it even if it is zero. 3113 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3114 submatrix (same for all local rows). 3115 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3116 off-diagonal portion of the local submatrix (possibly different for 3117 each block row) or NULL. 3118 3119 Output Parameter: 3120 . A - the matrix 3121 3122 Options Database Keys: 3123 + -mat_block_size - size of the blocks to use 3124 - -mat_use_hash_table <fact> 3125 3126 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3127 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3128 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3129 3130 Notes: 3131 If the *_nnz parameter is given then the *_nz parameter is ignored 3132 3133 A nonzero block is any block that as 1 or more nonzeros in it 3134 3135 The user MUST specify either the local or global matrix dimensions 3136 (possibly both). 3137 3138 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3139 than it must be used on all processors that share the object for that argument. 3140 3141 Storage Information: 3142 For a square global matrix we define each processor's diagonal portion 3143 to be its local rows and the corresponding columns (a square submatrix); 3144 each processor's off-diagonal portion encompasses the remainder of the 3145 local matrix (a rectangular submatrix). 3146 3147 The user can specify preallocated storage for the diagonal part of 3148 the local submatrix with either d_nz or d_nnz (not both). Set 3149 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3150 memory allocation. Likewise, specify preallocated storage for the 3151 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3152 3153 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3154 the figure below we depict these three local rows and all columns (0-11). 3155 3156 .vb 3157 0 1 2 3 4 5 6 7 8 9 10 11 3158 -------------------------- 3159 row 3 |o o o d d d o o o o o o 3160 row 4 |o o o d d d o o o o o o 3161 row 5 |o o o d d d o o o o o o 3162 -------------------------- 3163 .ve 3164 3165 Thus, any entries in the d locations are stored in the d (diagonal) 3166 submatrix, and any entries in the o locations are stored in the 3167 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3168 stored simply in the MATSEQBAIJ format for compressed row storage. 3169 3170 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3171 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3172 In general, for PDE problems in which most nonzeros are near the diagonal, 3173 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3174 or you will get TERRIBLE performance; see the users' manual chapter on 3175 matrices. 3176 3177 Level: intermediate 3178 3179 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3180 @*/ 3181 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3182 { 3183 PetscErrorCode ierr; 3184 PetscMPIInt size; 3185 3186 PetscFunctionBegin; 3187 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3188 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3189 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3190 if (size > 1) { 3191 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3192 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3193 } else { 3194 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3195 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3196 } 3197 PetscFunctionReturn(0); 3198 } 3199 3200 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3201 { 3202 Mat mat; 3203 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3204 PetscErrorCode ierr; 3205 PetscInt len=0; 3206 3207 PetscFunctionBegin; 3208 *newmat = 0; 3209 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3210 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3211 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3212 3213 mat->factortype = matin->factortype; 3214 mat->preallocated = PETSC_TRUE; 3215 mat->assembled = PETSC_TRUE; 3216 mat->insertmode = NOT_SET_VALUES; 3217 3218 a = (Mat_MPIBAIJ*)mat->data; 3219 mat->rmap->bs = matin->rmap->bs; 3220 a->bs2 = oldmat->bs2; 3221 a->mbs = oldmat->mbs; 3222 a->nbs = oldmat->nbs; 3223 a->Mbs = oldmat->Mbs; 3224 a->Nbs = oldmat->Nbs; 3225 3226 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3227 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3228 3229 a->size = oldmat->size; 3230 a->rank = oldmat->rank; 3231 a->donotstash = oldmat->donotstash; 3232 a->roworiented = oldmat->roworiented; 3233 a->rowindices = 0; 3234 a->rowvalues = 0; 3235 a->getrowactive = PETSC_FALSE; 3236 a->barray = 0; 3237 a->rstartbs = oldmat->rstartbs; 3238 a->rendbs = oldmat->rendbs; 3239 a->cstartbs = oldmat->cstartbs; 3240 a->cendbs = oldmat->cendbs; 3241 3242 /* hash table stuff */ 3243 a->ht = 0; 3244 a->hd = 0; 3245 a->ht_size = 0; 3246 a->ht_flag = oldmat->ht_flag; 3247 a->ht_fact = oldmat->ht_fact; 3248 a->ht_total_ct = 0; 3249 a->ht_insert_ct = 0; 3250 3251 ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+1);CHKERRQ(ierr); 3252 if (oldmat->colmap) { 3253 #if defined(PETSC_USE_CTABLE) 3254 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3255 #else 3256 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 3257 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3258 ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr); 3259 #endif 3260 } else a->colmap = 0; 3261 3262 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3263 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 3264 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3265 ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); 3266 } else a->garray = 0; 3267 3268 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3269 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3270 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3271 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3272 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3273 3274 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3275 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3276 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3277 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3278 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3279 *newmat = mat; 3280 PetscFunctionReturn(0); 3281 } 3282 3283 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3284 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 3285 { 3286 PetscInt header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k; 3287 PetscInt *rowidxs,*colidxs,rs,cs,ce; 3288 PetscScalar *matvals; 3289 PetscErrorCode ierr; 3290 3291 PetscFunctionBegin; 3292 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3293 3294 /* read in matrix header */ 3295 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3296 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3297 M = header[1]; N = header[2]; nz = header[3]; 3298 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 3299 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 3300 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3301 3302 /* set block sizes from the viewer's .info file */ 3303 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3304 /* set local sizes if not set already */ 3305 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3306 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3307 /* set global sizes if not set already */ 3308 if (mat->rmap->N < 0) mat->rmap->N = M; 3309 if (mat->cmap->N < 0) mat->cmap->N = N; 3310 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3311 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3312 3313 /* check if the matrix sizes are correct */ 3314 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3315 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 3316 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 3317 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 3318 ierr = PetscLayoutGetRange(mat->rmap,&rs,NULL); 3319 ierr = PetscLayoutGetRange(mat->cmap,&cs,&ce); 3320 mbs = m/bs; nbs = n/bs; 3321 3322 /* read in row lengths and build row indices */ 3323 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3324 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 3325 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3326 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr); 3327 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 3328 3329 /* read in column indices and matrix values */ 3330 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 3331 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 3332 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 3333 3334 { /* preallocate matrix storage */ 3335 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3336 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3337 PetscBool sbaij,done; 3338 PetscInt *d_nnz,*o_nnz; 3339 3340 ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr); 3341 ierr = PetscHSetICreate(&ht);CHKERRQ(ierr); 3342 ierr = PetscCalloc2(mbs,&d_nnz,mbs,&o_nnz);CHKERRQ(ierr); 3343 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPISBAIJ,&sbaij);CHKERRQ(ierr); 3344 for (i=0; i<mbs; i++) { 3345 ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr); 3346 ierr = PetscHSetIClear(ht);CHKERRQ(ierr); 3347 for (k=0; k<bs; k++) { 3348 PetscInt row = bs*i + k; 3349 for (j=rowidxs[row]; j<rowidxs[row+1]; j++) { 3350 PetscInt col = colidxs[j]; 3351 if (!sbaij || col >= row) { 3352 if (col >= cs && col < ce) { 3353 if (!PetscBTLookupSet(bt,(col-cs)/bs)) d_nnz[i]++; 3354 } else { 3355 ierr = PetscHSetIQueryAdd(ht,col/bs,&done);CHKERRQ(ierr); 3356 if (done) o_nnz[i]++; 3357 } 3358 } 3359 } 3360 } 3361 } 3362 ierr = PetscBTDestroy(&bt);CHKERRQ(ierr); 3363 ierr = PetscHSetIDestroy(&ht);CHKERRQ(ierr); 3364 ierr = MatMPIBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3365 ierr = MatMPISBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3366 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3367 } 3368 3369 /* store matrix values */ 3370 for (i=0; i<m; i++) { 3371 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i+1]; 3372 ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr); 3373 } 3374 3375 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3376 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3377 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3378 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3379 PetscFunctionReturn(0); 3380 } 3381 3382 PetscErrorCode MatLoad_MPIBAIJ(Mat mat,PetscViewer viewer) 3383 { 3384 PetscErrorCode ierr; 3385 PetscBool isbinary; 3386 3387 PetscFunctionBegin; 3388 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3389 if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 3390 ierr = MatLoad_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 3391 PetscFunctionReturn(0); 3392 } 3393 3394 /*@ 3395 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3396 3397 Input Parameters: 3398 + mat - the matrix 3399 - fact - factor 3400 3401 Not Collective, each process can use a different factor 3402 3403 Level: advanced 3404 3405 Notes: 3406 This can also be set by the command line option: -mat_use_hash_table <fact> 3407 3408 .seealso: MatSetOption() 3409 @*/ 3410 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3411 { 3412 PetscErrorCode ierr; 3413 3414 PetscFunctionBegin; 3415 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3416 PetscFunctionReturn(0); 3417 } 3418 3419 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3420 { 3421 Mat_MPIBAIJ *baij; 3422 3423 PetscFunctionBegin; 3424 baij = (Mat_MPIBAIJ*)mat->data; 3425 baij->ht_fact = fact; 3426 PetscFunctionReturn(0); 3427 } 3428 3429 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3430 { 3431 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3432 PetscBool flg; 3433 PetscErrorCode ierr; 3434 3435 PetscFunctionBegin; 3436 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg);CHKERRQ(ierr); 3437 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input"); 3438 if (Ad) *Ad = a->A; 3439 if (Ao) *Ao = a->B; 3440 if (colmap) *colmap = a->garray; 3441 PetscFunctionReturn(0); 3442 } 3443 3444 /* 3445 Special version for direct calls from Fortran (to eliminate two function call overheads 3446 */ 3447 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3448 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3449 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3450 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3451 #endif 3452 3453 /*@C 3454 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3455 3456 Collective on Mat 3457 3458 Input Parameters: 3459 + mat - the matrix 3460 . min - number of input rows 3461 . im - input rows 3462 . nin - number of input columns 3463 . in - input columns 3464 . v - numerical values input 3465 - addvin - INSERT_VALUES or ADD_VALUES 3466 3467 Notes: 3468 This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3469 3470 Level: advanced 3471 3472 .seealso: MatSetValuesBlocked() 3473 @*/ 3474 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3475 { 3476 /* convert input arguments to C version */ 3477 Mat mat = *matin; 3478 PetscInt m = *min, n = *nin; 3479 InsertMode addv = *addvin; 3480 3481 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3482 const MatScalar *value; 3483 MatScalar *barray = baij->barray; 3484 PetscBool roworiented = baij->roworiented; 3485 PetscErrorCode ierr; 3486 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3487 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3488 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3489 3490 PetscFunctionBegin; 3491 /* tasks normally handled by MatSetValuesBlocked() */ 3492 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3493 #if defined(PETSC_USE_DEBUG) 3494 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3495 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3496 #endif 3497 if (mat->assembled) { 3498 mat->was_assembled = PETSC_TRUE; 3499 mat->assembled = PETSC_FALSE; 3500 } 3501 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3502 3503 3504 if (!barray) { 3505 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 3506 baij->barray = barray; 3507 } 3508 3509 if (roworiented) stepval = (n-1)*bs; 3510 else stepval = (m-1)*bs; 3511 3512 for (i=0; i<m; i++) { 3513 if (im[i] < 0) continue; 3514 #if defined(PETSC_USE_DEBUG) 3515 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3516 #endif 3517 if (im[i] >= rstart && im[i] < rend) { 3518 row = im[i] - rstart; 3519 for (j=0; j<n; j++) { 3520 /* If NumCol = 1 then a copy is not required */ 3521 if ((roworiented) && (n == 1)) { 3522 barray = (MatScalar*)v + i*bs2; 3523 } else if ((!roworiented) && (m == 1)) { 3524 barray = (MatScalar*)v + j*bs2; 3525 } else { /* Here a copy is required */ 3526 if (roworiented) { 3527 value = v + i*(stepval+bs)*bs + j*bs; 3528 } else { 3529 value = v + j*(stepval+bs)*bs + i*bs; 3530 } 3531 for (ii=0; ii<bs; ii++,value+=stepval) { 3532 for (jj=0; jj<bs; jj++) { 3533 *barray++ = *value++; 3534 } 3535 } 3536 barray -=bs2; 3537 } 3538 3539 if (in[j] >= cstart && in[j] < cend) { 3540 col = in[j] - cstart; 3541 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 3542 } else if (in[j] < 0) continue; 3543 #if defined(PETSC_USE_DEBUG) 3544 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 3545 #endif 3546 else { 3547 if (mat->was_assembled) { 3548 if (!baij->colmap) { 3549 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3550 } 3551 3552 #if defined(PETSC_USE_DEBUG) 3553 #if defined(PETSC_USE_CTABLE) 3554 { PetscInt data; 3555 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3556 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3557 } 3558 #else 3559 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3560 #endif 3561 #endif 3562 #if defined(PETSC_USE_CTABLE) 3563 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3564 col = (col - 1)/bs; 3565 #else 3566 col = (baij->colmap[in[j]] - 1)/bs; 3567 #endif 3568 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3569 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3570 col = in[j]; 3571 } 3572 } else col = in[j]; 3573 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 3574 } 3575 } 3576 } else { 3577 if (!baij->donotstash) { 3578 if (roworiented) { 3579 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3580 } else { 3581 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3582 } 3583 } 3584 } 3585 } 3586 3587 /* task normally handled by MatSetValuesBlocked() */ 3588 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3589 PetscFunctionReturn(0); 3590 } 3591 3592 /*@ 3593 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block 3594 CSR format the local rows. 3595 3596 Collective 3597 3598 Input Parameters: 3599 + comm - MPI communicator 3600 . bs - the block size, only a block size of 1 is supported 3601 . m - number of local rows (Cannot be PETSC_DECIDE) 3602 . n - This value should be the same as the local size used in creating the 3603 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3604 calculated if N is given) For square matrices n is almost always m. 3605 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3606 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3607 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3608 . j - column indices 3609 - a - matrix values 3610 3611 Output Parameter: 3612 . mat - the matrix 3613 3614 Level: intermediate 3615 3616 Notes: 3617 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3618 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3619 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3620 3621 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3622 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3623 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3624 with column-major ordering within blocks. 3625 3626 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3627 3628 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3629 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3630 @*/ 3631 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3632 { 3633 PetscErrorCode ierr; 3634 3635 PetscFunctionBegin; 3636 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3637 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3638 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3639 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3640 ierr = MatSetType(*mat,MATMPIBAIJ);CHKERRQ(ierr); 3641 ierr = MatSetBlockSize(*mat,bs);CHKERRQ(ierr); 3642 ierr = MatSetUp(*mat);CHKERRQ(ierr); 3643 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3644 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3645 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3646 PetscFunctionReturn(0); 3647 } 3648 3649 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3650 { 3651 PetscErrorCode ierr; 3652 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3653 PetscInt *indx; 3654 PetscScalar *values; 3655 3656 PetscFunctionBegin; 3657 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3658 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3659 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inmat->data; 3660 PetscInt *dnz,*onz,mbs,Nbs,nbs; 3661 PetscInt *bindx,rmax=a->rmax,j; 3662 PetscMPIInt rank,size; 3663 3664 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3665 mbs = m/bs; Nbs = N/cbs; 3666 if (n == PETSC_DECIDE) { 3667 ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N); 3668 } 3669 nbs = n/cbs; 3670 3671 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 3672 ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */ 3673 3674 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3675 ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr); 3676 if (rank == size-1) { 3677 /* Check sum(nbs) = Nbs */ 3678 if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs); 3679 } 3680 3681 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */ 3682 for (i=0; i<mbs; i++) { 3683 ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3684 nnz = nnz/bs; 3685 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3686 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3687 ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3688 } 3689 ierr = PetscFree(bindx);CHKERRQ(ierr); 3690 3691 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3692 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3693 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3694 ierr = MatSetType(*outmat,MATBAIJ);CHKERRQ(ierr); 3695 ierr = MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr); 3696 ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3697 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3698 } 3699 3700 /* numeric phase */ 3701 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3702 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3703 3704 for (i=0; i<m; i++) { 3705 ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3706 Ii = i + rstart; 3707 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3708 ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3709 } 3710 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3711 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3712 PetscFunctionReturn(0); 3713 } 3714