1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 #include <petscsf.h> 5 6 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 7 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat); 8 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 9 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 10 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 11 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec); 14 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 17 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 18 { 19 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 20 PetscErrorCode ierr; 21 PetscInt i,*idxb = 0; 22 PetscScalar *va,*vb; 23 Vec vtmp; 24 25 PetscFunctionBegin; 26 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 27 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 28 if (idx) { 29 for (i=0; i<A->rmap->n; i++) { 30 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 31 } 32 } 33 34 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 35 if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);} 36 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 37 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 38 39 for (i=0; i<A->rmap->n; i++) { 40 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 41 va[i] = vb[i]; 42 if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs); 43 } 44 } 45 46 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 47 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 48 ierr = PetscFree(idxb);CHKERRQ(ierr); 49 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 50 PetscFunctionReturn(0); 51 } 52 53 #undef __FUNCT__ 54 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 55 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 56 { 57 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 58 PetscErrorCode ierr; 59 60 PetscFunctionBegin; 61 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 62 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 63 PetscFunctionReturn(0); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 68 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 69 { 70 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 75 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 76 PetscFunctionReturn(0); 77 } 78 79 /* 80 Local utility routine that creates a mapping from the global column 81 number to the local number in the off-diagonal part of the local 82 storage of the matrix. This is done in a non scalable way since the 83 length of colmap equals the global matrix length. 84 */ 85 #undef __FUNCT__ 86 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private" 87 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 88 { 89 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 90 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 91 PetscErrorCode ierr; 92 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 93 94 PetscFunctionBegin; 95 #if defined(PETSC_USE_CTABLE) 96 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 97 for (i=0; i<nbs; i++) { 98 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 99 } 100 #else 101 ierr = PetscMalloc1((baij->Nbs+1),&baij->colmap);CHKERRQ(ierr); 102 ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 103 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 104 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 105 #endif 106 PetscFunctionReturn(0); 107 } 108 109 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 110 { \ 111 \ 112 brow = row/bs; \ 113 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 114 rmax = aimax[brow]; nrow = ailen[brow]; \ 115 bcol = col/bs; \ 116 ridx = row % bs; cidx = col % bs; \ 117 low = 0; high = nrow; \ 118 while (high-low > 3) { \ 119 t = (low+high)/2; \ 120 if (rp[t] > bcol) high = t; \ 121 else low = t; \ 122 } \ 123 for (_i=low; _i<high; _i++) { \ 124 if (rp[_i] > bcol) break; \ 125 if (rp[_i] == bcol) { \ 126 bap = ap + bs2*_i + bs*cidx + ridx; \ 127 if (addv == ADD_VALUES) *bap += value; \ 128 else *bap = value; \ 129 goto a_noinsert; \ 130 } \ 131 } \ 132 if (a->nonew == 1) goto a_noinsert; \ 133 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 134 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 135 N = nrow++ - 1; \ 136 /* shift up all the later entries in this row */ \ 137 for (ii=N; ii>=_i; ii--) { \ 138 rp[ii+1] = rp[ii]; \ 139 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 140 } \ 141 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 142 rp[_i] = bcol; \ 143 ap[bs2*_i + bs*cidx + ridx] = value; \ 144 a_noinsert:; \ 145 ailen[brow] = nrow; \ 146 } 147 148 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 149 { \ 150 brow = row/bs; \ 151 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 152 rmax = bimax[brow]; nrow = bilen[brow]; \ 153 bcol = col/bs; \ 154 ridx = row % bs; cidx = col % bs; \ 155 low = 0; high = nrow; \ 156 while (high-low > 3) { \ 157 t = (low+high)/2; \ 158 if (rp[t] > bcol) high = t; \ 159 else low = t; \ 160 } \ 161 for (_i=low; _i<high; _i++) { \ 162 if (rp[_i] > bcol) break; \ 163 if (rp[_i] == bcol) { \ 164 bap = ap + bs2*_i + bs*cidx + ridx; \ 165 if (addv == ADD_VALUES) *bap += value; \ 166 else *bap = value; \ 167 goto b_noinsert; \ 168 } \ 169 } \ 170 if (b->nonew == 1) goto b_noinsert; \ 171 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 172 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 173 N = nrow++ - 1; \ 174 /* shift up all the later entries in this row */ \ 175 for (ii=N; ii>=_i; ii--) { \ 176 rp[ii+1] = rp[ii]; \ 177 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 178 } \ 179 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 180 rp[_i] = bcol; \ 181 ap[bs2*_i + bs*cidx + ridx] = value; \ 182 b_noinsert:; \ 183 bilen[brow] = nrow; \ 184 } 185 186 #undef __FUNCT__ 187 #define __FUNCT__ "MatSetValues_MPIBAIJ" 188 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 189 { 190 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 191 MatScalar value; 192 PetscBool roworiented = baij->roworiented; 193 PetscErrorCode ierr; 194 PetscInt i,j,row,col; 195 PetscInt rstart_orig=mat->rmap->rstart; 196 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 197 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 198 199 /* Some Variables required in the macro */ 200 Mat A = baij->A; 201 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 202 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 203 MatScalar *aa =a->a; 204 205 Mat B = baij->B; 206 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 207 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 208 MatScalar *ba =b->a; 209 210 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 211 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 212 MatScalar *ap,*bap; 213 214 PetscFunctionBegin; 215 if (v) PetscValidScalarPointer(v,6); 216 for (i=0; i<m; i++) { 217 if (im[i] < 0) continue; 218 #if defined(PETSC_USE_DEBUG) 219 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 220 #endif 221 if (im[i] >= rstart_orig && im[i] < rend_orig) { 222 row = im[i] - rstart_orig; 223 for (j=0; j<n; j++) { 224 if (in[j] >= cstart_orig && in[j] < cend_orig) { 225 col = in[j] - cstart_orig; 226 if (roworiented) value = v[i*n+j]; 227 else value = v[i+j*m]; 228 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 229 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 230 } else if (in[j] < 0) continue; 231 #if defined(PETSC_USE_DEBUG) 232 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 233 #endif 234 else { 235 if (mat->was_assembled) { 236 if (!baij->colmap) { 237 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 238 } 239 #if defined(PETSC_USE_CTABLE) 240 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 241 col = col - 1; 242 #else 243 col = baij->colmap[in[j]/bs] - 1; 244 #endif 245 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 246 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 247 col = in[j]; 248 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 249 B = baij->B; 250 b = (Mat_SeqBAIJ*)(B)->data; 251 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 252 ba =b->a; 253 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 254 else col += in[j]%bs; 255 } else col = in[j]; 256 if (roworiented) value = v[i*n+j]; 257 else value = v[i+j*m]; 258 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 259 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 260 } 261 } 262 } else { 263 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 264 if (!baij->donotstash) { 265 mat->assembled = PETSC_FALSE; 266 if (roworiented) { 267 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 268 } else { 269 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 270 } 271 } 272 } 273 } 274 PetscFunctionReturn(0); 275 } 276 277 #undef __FUNCT__ 278 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 279 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 280 { 281 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 282 const PetscScalar *value; 283 MatScalar *barray = baij->barray; 284 PetscBool roworiented = baij->roworiented; 285 PetscErrorCode ierr; 286 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 287 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 288 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 289 290 PetscFunctionBegin; 291 if (!barray) { 292 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 293 baij->barray = barray; 294 } 295 296 if (roworiented) stepval = (n-1)*bs; 297 else stepval = (m-1)*bs; 298 299 for (i=0; i<m; i++) { 300 if (im[i] < 0) continue; 301 #if defined(PETSC_USE_DEBUG) 302 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 303 #endif 304 if (im[i] >= rstart && im[i] < rend) { 305 row = im[i] - rstart; 306 for (j=0; j<n; j++) { 307 /* If NumCol = 1 then a copy is not required */ 308 if ((roworiented) && (n == 1)) { 309 barray = (MatScalar*)v + i*bs2; 310 } else if ((!roworiented) && (m == 1)) { 311 barray = (MatScalar*)v + j*bs2; 312 } else { /* Here a copy is required */ 313 if (roworiented) { 314 value = v + (i*(stepval+bs) + j)*bs; 315 } else { 316 value = v + (j*(stepval+bs) + i)*bs; 317 } 318 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 319 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 320 barray += bs; 321 } 322 barray -= bs2; 323 } 324 325 if (in[j] >= cstart && in[j] < cend) { 326 col = in[j] - cstart; 327 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 328 } else if (in[j] < 0) continue; 329 #if defined(PETSC_USE_DEBUG) 330 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 331 #endif 332 else { 333 if (mat->was_assembled) { 334 if (!baij->colmap) { 335 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 336 } 337 338 #if defined(PETSC_USE_DEBUG) 339 #if defined(PETSC_USE_CTABLE) 340 { PetscInt data; 341 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 342 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 343 } 344 #else 345 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 346 #endif 347 #endif 348 #if defined(PETSC_USE_CTABLE) 349 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 350 col = (col - 1)/bs; 351 #else 352 col = (baij->colmap[in[j]] - 1)/bs; 353 #endif 354 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 355 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 356 col = in[j]; 357 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]); 358 } else col = in[j]; 359 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 360 } 361 } 362 } else { 363 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 364 if (!baij->donotstash) { 365 if (roworiented) { 366 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 367 } else { 368 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 369 } 370 } 371 } 372 } 373 PetscFunctionReturn(0); 374 } 375 376 #define HASH_KEY 0.6180339887 377 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 378 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 379 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 380 #undef __FUNCT__ 381 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 382 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 383 { 384 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 385 PetscBool roworiented = baij->roworiented; 386 PetscErrorCode ierr; 387 PetscInt i,j,row,col; 388 PetscInt rstart_orig=mat->rmap->rstart; 389 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 390 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 391 PetscReal tmp; 392 MatScalar **HD = baij->hd,value; 393 #if defined(PETSC_USE_DEBUG) 394 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 395 #endif 396 397 PetscFunctionBegin; 398 if (v) PetscValidScalarPointer(v,6); 399 for (i=0; i<m; i++) { 400 #if defined(PETSC_USE_DEBUG) 401 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 402 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 403 #endif 404 row = im[i]; 405 if (row >= rstart_orig && row < rend_orig) { 406 for (j=0; j<n; j++) { 407 col = in[j]; 408 if (roworiented) value = v[i*n+j]; 409 else value = v[i+j*m]; 410 /* Look up PetscInto the Hash Table */ 411 key = (row/bs)*Nbs+(col/bs)+1; 412 h1 = HASH(size,key,tmp); 413 414 415 idx = h1; 416 #if defined(PETSC_USE_DEBUG) 417 insert_ct++; 418 total_ct++; 419 if (HT[idx] != key) { 420 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 421 if (idx == size) { 422 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 423 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 424 } 425 } 426 #else 427 if (HT[idx] != key) { 428 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 429 if (idx == size) { 430 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 431 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 432 } 433 } 434 #endif 435 /* A HASH table entry is found, so insert the values at the correct address */ 436 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 437 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 438 } 439 } else if (!baij->donotstash) { 440 if (roworiented) { 441 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 442 } else { 443 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 444 } 445 } 446 } 447 #if defined(PETSC_USE_DEBUG) 448 baij->ht_total_ct = total_ct; 449 baij->ht_insert_ct = insert_ct; 450 #endif 451 PetscFunctionReturn(0); 452 } 453 454 #undef __FUNCT__ 455 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 456 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 457 { 458 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 459 PetscBool roworiented = baij->roworiented; 460 PetscErrorCode ierr; 461 PetscInt i,j,ii,jj,row,col; 462 PetscInt rstart=baij->rstartbs; 463 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 464 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 465 PetscReal tmp; 466 MatScalar **HD = baij->hd,*baij_a; 467 const PetscScalar *v_t,*value; 468 #if defined(PETSC_USE_DEBUG) 469 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 470 #endif 471 472 PetscFunctionBegin; 473 if (roworiented) stepval = (n-1)*bs; 474 else stepval = (m-1)*bs; 475 476 for (i=0; i<m; i++) { 477 #if defined(PETSC_USE_DEBUG) 478 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 479 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 480 #endif 481 row = im[i]; 482 v_t = v + i*nbs2; 483 if (row >= rstart && row < rend) { 484 for (j=0; j<n; j++) { 485 col = in[j]; 486 487 /* Look up into the Hash Table */ 488 key = row*Nbs+col+1; 489 h1 = HASH(size,key,tmp); 490 491 idx = h1; 492 #if defined(PETSC_USE_DEBUG) 493 total_ct++; 494 insert_ct++; 495 if (HT[idx] != key) { 496 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 497 if (idx == size) { 498 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 499 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 500 } 501 } 502 #else 503 if (HT[idx] != key) { 504 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 505 if (idx == size) { 506 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 507 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 508 } 509 } 510 #endif 511 baij_a = HD[idx]; 512 if (roworiented) { 513 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 514 /* value = v + (i*(stepval+bs)+j)*bs; */ 515 value = v_t; 516 v_t += bs; 517 if (addv == ADD_VALUES) { 518 for (ii=0; ii<bs; ii++,value+=stepval) { 519 for (jj=ii; jj<bs2; jj+=bs) { 520 baij_a[jj] += *value++; 521 } 522 } 523 } else { 524 for (ii=0; ii<bs; ii++,value+=stepval) { 525 for (jj=ii; jj<bs2; jj+=bs) { 526 baij_a[jj] = *value++; 527 } 528 } 529 } 530 } else { 531 value = v + j*(stepval+bs)*bs + i*bs; 532 if (addv == ADD_VALUES) { 533 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 534 for (jj=0; jj<bs; jj++) { 535 baij_a[jj] += *value++; 536 } 537 } 538 } else { 539 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 540 for (jj=0; jj<bs; jj++) { 541 baij_a[jj] = *value++; 542 } 543 } 544 } 545 } 546 } 547 } else { 548 if (!baij->donotstash) { 549 if (roworiented) { 550 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 551 } else { 552 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 553 } 554 } 555 } 556 } 557 #if defined(PETSC_USE_DEBUG) 558 baij->ht_total_ct = total_ct; 559 baij->ht_insert_ct = insert_ct; 560 #endif 561 PetscFunctionReturn(0); 562 } 563 564 #undef __FUNCT__ 565 #define __FUNCT__ "MatGetValues_MPIBAIJ" 566 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 567 { 568 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 569 PetscErrorCode ierr; 570 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 571 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 572 573 PetscFunctionBegin; 574 for (i=0; i<m; i++) { 575 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 576 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 577 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 578 row = idxm[i] - bsrstart; 579 for (j=0; j<n; j++) { 580 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 581 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 582 if (idxn[j] >= bscstart && idxn[j] < bscend) { 583 col = idxn[j] - bscstart; 584 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 585 } else { 586 if (!baij->colmap) { 587 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 588 } 589 #if defined(PETSC_USE_CTABLE) 590 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 591 data--; 592 #else 593 data = baij->colmap[idxn[j]/bs]-1; 594 #endif 595 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 596 else { 597 col = data + idxn[j]%bs; 598 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 599 } 600 } 601 } 602 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 603 } 604 PetscFunctionReturn(0); 605 } 606 607 #undef __FUNCT__ 608 #define __FUNCT__ "MatNorm_MPIBAIJ" 609 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 610 { 611 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 612 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 613 PetscErrorCode ierr; 614 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 615 PetscReal sum = 0.0; 616 MatScalar *v; 617 618 PetscFunctionBegin; 619 if (baij->size == 1) { 620 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 621 } else { 622 if (type == NORM_FROBENIUS) { 623 v = amat->a; 624 nz = amat->nz*bs2; 625 for (i=0; i<nz; i++) { 626 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 627 } 628 v = bmat->a; 629 nz = bmat->nz*bs2; 630 for (i=0; i<nz; i++) { 631 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 632 } 633 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 634 *nrm = PetscSqrtReal(*nrm); 635 } else if (type == NORM_1) { /* max column sum */ 636 PetscReal *tmp,*tmp2; 637 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 638 ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr); 639 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 640 v = amat->a; jj = amat->j; 641 for (i=0; i<amat->nz; i++) { 642 for (j=0; j<bs; j++) { 643 col = bs*(cstart + *jj) + j; /* column index */ 644 for (row=0; row<bs; row++) { 645 tmp[col] += PetscAbsScalar(*v); v++; 646 } 647 } 648 jj++; 649 } 650 v = bmat->a; jj = bmat->j; 651 for (i=0; i<bmat->nz; i++) { 652 for (j=0; j<bs; j++) { 653 col = bs*garray[*jj] + j; 654 for (row=0; row<bs; row++) { 655 tmp[col] += PetscAbsScalar(*v); v++; 656 } 657 } 658 jj++; 659 } 660 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 661 *nrm = 0.0; 662 for (j=0; j<mat->cmap->N; j++) { 663 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 664 } 665 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 666 } else if (type == NORM_INFINITY) { /* max row sum */ 667 PetscReal *sums; 668 ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr); 669 sum = 0.0; 670 for (j=0; j<amat->mbs; j++) { 671 for (row=0; row<bs; row++) sums[row] = 0.0; 672 v = amat->a + bs2*amat->i[j]; 673 nz = amat->i[j+1]-amat->i[j]; 674 for (i=0; i<nz; i++) { 675 for (col=0; col<bs; col++) { 676 for (row=0; row<bs; row++) { 677 sums[row] += PetscAbsScalar(*v); v++; 678 } 679 } 680 } 681 v = bmat->a + bs2*bmat->i[j]; 682 nz = bmat->i[j+1]-bmat->i[j]; 683 for (i=0; i<nz; i++) { 684 for (col=0; col<bs; col++) { 685 for (row=0; row<bs; row++) { 686 sums[row] += PetscAbsScalar(*v); v++; 687 } 688 } 689 } 690 for (row=0; row<bs; row++) { 691 if (sums[row] > sum) sum = sums[row]; 692 } 693 } 694 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 695 ierr = PetscFree(sums);CHKERRQ(ierr); 696 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 697 } 698 PetscFunctionReturn(0); 699 } 700 701 /* 702 Creates the hash table, and sets the table 703 This table is created only once. 704 If new entried need to be added to the matrix 705 then the hash table has to be destroyed and 706 recreated. 707 */ 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 710 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 711 { 712 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 713 Mat A = baij->A,B=baij->B; 714 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 715 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 716 PetscErrorCode ierr; 717 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 718 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 719 PetscInt *HT,key; 720 MatScalar **HD; 721 PetscReal tmp; 722 #if defined(PETSC_USE_INFO) 723 PetscInt ct=0,max=0; 724 #endif 725 726 PetscFunctionBegin; 727 if (baij->ht) PetscFunctionReturn(0); 728 729 baij->ht_size = (PetscInt)(factor*nz); 730 ht_size = baij->ht_size; 731 732 /* Allocate Memory for Hash Table */ 733 ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr); 734 HD = baij->hd; 735 HT = baij->ht; 736 737 /* Loop Over A */ 738 for (i=0; i<a->mbs; i++) { 739 for (j=ai[i]; j<ai[i+1]; j++) { 740 row = i+rstart; 741 col = aj[j]+cstart; 742 743 key = row*Nbs + col + 1; 744 h1 = HASH(ht_size,key,tmp); 745 for (k=0; k<ht_size; k++) { 746 if (!HT[(h1+k)%ht_size]) { 747 HT[(h1+k)%ht_size] = key; 748 HD[(h1+k)%ht_size] = a->a + j*bs2; 749 break; 750 #if defined(PETSC_USE_INFO) 751 } else { 752 ct++; 753 #endif 754 } 755 } 756 #if defined(PETSC_USE_INFO) 757 if (k> max) max = k; 758 #endif 759 } 760 } 761 /* Loop Over B */ 762 for (i=0; i<b->mbs; i++) { 763 for (j=bi[i]; j<bi[i+1]; j++) { 764 row = i+rstart; 765 col = garray[bj[j]]; 766 key = row*Nbs + col + 1; 767 h1 = HASH(ht_size,key,tmp); 768 for (k=0; k<ht_size; k++) { 769 if (!HT[(h1+k)%ht_size]) { 770 HT[(h1+k)%ht_size] = key; 771 HD[(h1+k)%ht_size] = b->a + j*bs2; 772 break; 773 #if defined(PETSC_USE_INFO) 774 } else { 775 ct++; 776 #endif 777 } 778 } 779 #if defined(PETSC_USE_INFO) 780 if (k> max) max = k; 781 #endif 782 } 783 } 784 785 /* Print Summary */ 786 #if defined(PETSC_USE_INFO) 787 for (i=0,j=0; i<ht_size; i++) { 788 if (HT[i]) j++; 789 } 790 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 791 #endif 792 PetscFunctionReturn(0); 793 } 794 795 #undef __FUNCT__ 796 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 797 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 798 { 799 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 800 PetscErrorCode ierr; 801 PetscInt nstash,reallocs; 802 InsertMode addv; 803 804 PetscFunctionBegin; 805 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 806 807 /* make sure all processors are either in INSERTMODE or ADDMODE */ 808 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 809 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 810 mat->insertmode = addv; /* in case this processor had no cache */ 811 812 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 813 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 814 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 815 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 816 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 817 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 818 PetscFunctionReturn(0); 819 } 820 821 #undef __FUNCT__ 822 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 823 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 824 { 825 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 826 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 827 PetscErrorCode ierr; 828 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 829 PetscInt *row,*col; 830 PetscBool r1,r2,r3,other_disassembled; 831 MatScalar *val; 832 InsertMode addv = mat->insertmode; 833 PetscMPIInt n; 834 835 PetscFunctionBegin; 836 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 837 if (!baij->donotstash && !mat->nooffprocentries) { 838 while (1) { 839 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 840 if (!flg) break; 841 842 for (i=0; i<n;) { 843 /* Now identify the consecutive vals belonging to the same row */ 844 for (j=i,rstart=row[j]; j<n; j++) { 845 if (row[j] != rstart) break; 846 } 847 if (j < n) ncols = j-i; 848 else ncols = n-i; 849 /* Now assemble all these values with a single function call */ 850 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 851 i = j; 852 } 853 } 854 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 855 /* Now process the block-stash. Since the values are stashed column-oriented, 856 set the roworiented flag to column oriented, and after MatSetValues() 857 restore the original flags */ 858 r1 = baij->roworiented; 859 r2 = a->roworiented; 860 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 861 862 baij->roworiented = PETSC_FALSE; 863 a->roworiented = PETSC_FALSE; 864 865 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 866 while (1) { 867 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 868 if (!flg) break; 869 870 for (i=0; i<n;) { 871 /* Now identify the consecutive vals belonging to the same row */ 872 for (j=i,rstart=row[j]; j<n; j++) { 873 if (row[j] != rstart) break; 874 } 875 if (j < n) ncols = j-i; 876 else ncols = n-i; 877 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 878 i = j; 879 } 880 } 881 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 882 883 baij->roworiented = r1; 884 a->roworiented = r2; 885 886 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 887 } 888 889 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 890 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 891 892 /* determine if any processor has disassembled, if so we must 893 also disassemble ourselfs, in order that we may reassemble. */ 894 /* 895 if nonzero structure of submatrix B cannot change then we know that 896 no processor disassembled thus we can skip this stuff 897 */ 898 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 899 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 900 if (mat->was_assembled && !other_disassembled) { 901 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 902 } 903 } 904 905 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 906 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 907 } 908 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 909 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 910 911 #if defined(PETSC_USE_INFO) 912 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 913 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 914 915 baij->ht_total_ct = 0; 916 baij->ht_insert_ct = 0; 917 } 918 #endif 919 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 920 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 921 922 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 923 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 924 } 925 926 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 927 928 baij->rowvalues = 0; 929 PetscFunctionReturn(0); 930 } 931 932 #include <petscdraw.h> 933 #undef __FUNCT__ 934 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 935 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 936 { 937 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 938 PetscErrorCode ierr; 939 PetscMPIInt size = baij->size,rank = baij->rank; 940 PetscInt bs = mat->rmap->bs; 941 PetscBool iascii,isdraw; 942 PetscViewer sviewer; 943 PetscViewerFormat format; 944 945 PetscFunctionBegin; 946 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 947 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 948 if (iascii) { 949 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 950 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 951 MatInfo info; 952 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 953 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 954 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 955 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 956 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 957 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 958 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 959 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 960 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 961 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 962 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 963 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 964 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 965 PetscFunctionReturn(0); 966 } else if (format == PETSC_VIEWER_ASCII_INFO) { 967 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 968 PetscFunctionReturn(0); 969 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 970 PetscFunctionReturn(0); 971 } 972 } 973 974 if (isdraw) { 975 PetscDraw draw; 976 PetscBool isnull; 977 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 978 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 979 } 980 981 if (size == 1) { 982 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 983 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 984 } else { 985 /* assemble the entire matrix onto first processor. */ 986 Mat A; 987 Mat_SeqBAIJ *Aloc; 988 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 989 MatScalar *a; 990 991 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 992 /* Perhaps this should be the type of mat? */ 993 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 994 if (!rank) { 995 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 996 } else { 997 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 998 } 999 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1000 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 1001 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1002 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1003 1004 /* copy over the A part */ 1005 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1006 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1007 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1008 1009 for (i=0; i<mbs; i++) { 1010 rvals[0] = bs*(baij->rstartbs + i); 1011 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1012 for (j=ai[i]; j<ai[i+1]; j++) { 1013 col = (baij->cstartbs+aj[j])*bs; 1014 for (k=0; k<bs; k++) { 1015 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1016 col++; a += bs; 1017 } 1018 } 1019 } 1020 /* copy over the B part */ 1021 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1022 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1023 for (i=0; i<mbs; i++) { 1024 rvals[0] = bs*(baij->rstartbs + i); 1025 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1026 for (j=ai[i]; j<ai[i+1]; j++) { 1027 col = baij->garray[aj[j]]*bs; 1028 for (k=0; k<bs; k++) { 1029 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1030 col++; a += bs; 1031 } 1032 } 1033 } 1034 ierr = PetscFree(rvals);CHKERRQ(ierr); 1035 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1036 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1037 /* 1038 Everyone has to call to draw the matrix since the graphics waits are 1039 synchronized across all processors that share the PetscDraw object 1040 */ 1041 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1042 if (!rank) { 1043 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1044 /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/ 1045 PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ); 1046 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1047 } 1048 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1049 ierr = MatDestroy(&A);CHKERRQ(ierr); 1050 } 1051 PetscFunctionReturn(0); 1052 } 1053 1054 #undef __FUNCT__ 1055 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1056 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1057 { 1058 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1059 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)a->A->data; 1060 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 1061 PetscErrorCode ierr; 1062 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1063 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1064 int fd; 1065 PetscScalar *column_values; 1066 FILE *file; 1067 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1068 PetscInt message_count,flowcontrolcount; 1069 1070 PetscFunctionBegin; 1071 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1072 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1073 nz = bs2*(A->nz + B->nz); 1074 rlen = mat->rmap->n; 1075 if (!rank) { 1076 header[0] = MAT_FILE_CLASSID; 1077 header[1] = mat->rmap->N; 1078 header[2] = mat->cmap->N; 1079 1080 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1081 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1082 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1083 /* get largest number of rows any processor has */ 1084 range = mat->rmap->range; 1085 for (i=1; i<size; i++) { 1086 rlen = PetscMax(rlen,range[i+1] - range[i]); 1087 } 1088 } else { 1089 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1090 } 1091 1092 ierr = PetscMalloc1((rlen/bs),&crow_lens);CHKERRQ(ierr); 1093 /* compute lengths of each row */ 1094 for (i=0; i<a->mbs; i++) { 1095 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1096 } 1097 /* store the row lengths to the file */ 1098 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1099 if (!rank) { 1100 MPI_Status status; 1101 ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr); 1102 rlen = (range[1] - range[0])/bs; 1103 for (i=0; i<rlen; i++) { 1104 for (j=0; j<bs; j++) { 1105 row_lens[i*bs+j] = bs*crow_lens[i]; 1106 } 1107 } 1108 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1109 for (i=1; i<size; i++) { 1110 rlen = (range[i+1] - range[i])/bs; 1111 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1112 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1113 for (k=0; k<rlen; k++) { 1114 for (j=0; j<bs; j++) { 1115 row_lens[k*bs+j] = bs*crow_lens[k]; 1116 } 1117 } 1118 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1119 } 1120 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1121 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1122 } else { 1123 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1124 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1125 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1126 } 1127 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1128 1129 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1130 information needed to make it for each row from a block row. This does require more communication but still not more than 1131 the communication needed for the nonzero values */ 1132 nzmax = nz; /* space a largest processor needs */ 1133 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1134 ierr = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr); 1135 cnt = 0; 1136 for (i=0; i<a->mbs; i++) { 1137 pcnt = cnt; 1138 for (j=B->i[i]; j<B->i[i+1]; j++) { 1139 if ((col = garray[B->j[j]]) > cstart) break; 1140 for (l=0; l<bs; l++) { 1141 column_indices[cnt++] = bs*col+l; 1142 } 1143 } 1144 for (k=A->i[i]; k<A->i[i+1]; k++) { 1145 for (l=0; l<bs; l++) { 1146 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1147 } 1148 } 1149 for (; j<B->i[i+1]; j++) { 1150 for (l=0; l<bs; l++) { 1151 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1152 } 1153 } 1154 len = cnt - pcnt; 1155 for (k=1; k<bs; k++) { 1156 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1157 cnt += len; 1158 } 1159 } 1160 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1161 1162 /* store the columns to the file */ 1163 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1164 if (!rank) { 1165 MPI_Status status; 1166 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1167 for (i=1; i<size; i++) { 1168 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1169 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1170 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1171 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1172 } 1173 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1174 } else { 1175 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1176 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1177 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1178 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1179 } 1180 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1181 1182 /* load up the numerical values */ 1183 ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr); 1184 cnt = 0; 1185 for (i=0; i<a->mbs; i++) { 1186 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1187 for (j=B->i[i]; j<B->i[i+1]; j++) { 1188 if (garray[B->j[j]] > cstart) break; 1189 for (l=0; l<bs; l++) { 1190 for (ll=0; ll<bs; ll++) { 1191 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1192 } 1193 } 1194 cnt += bs; 1195 } 1196 for (k=A->i[i]; k<A->i[i+1]; k++) { 1197 for (l=0; l<bs; l++) { 1198 for (ll=0; ll<bs; ll++) { 1199 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1200 } 1201 } 1202 cnt += bs; 1203 } 1204 for (; j<B->i[i+1]; j++) { 1205 for (l=0; l<bs; l++) { 1206 for (ll=0; ll<bs; ll++) { 1207 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1208 } 1209 } 1210 cnt += bs; 1211 } 1212 cnt += (bs-1)*rlen; 1213 } 1214 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1215 1216 /* store the column values to the file */ 1217 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1218 if (!rank) { 1219 MPI_Status status; 1220 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1221 for (i=1; i<size; i++) { 1222 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1223 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1224 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1225 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1226 } 1227 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1228 } else { 1229 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1230 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1231 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1232 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1233 } 1234 ierr = PetscFree(column_values);CHKERRQ(ierr); 1235 1236 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1237 if (file) { 1238 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1239 } 1240 PetscFunctionReturn(0); 1241 } 1242 1243 #undef __FUNCT__ 1244 #define __FUNCT__ "MatView_MPIBAIJ" 1245 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1246 { 1247 PetscErrorCode ierr; 1248 PetscBool iascii,isdraw,issocket,isbinary; 1249 1250 PetscFunctionBegin; 1251 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1252 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1253 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1254 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1255 if (iascii || isdraw || issocket) { 1256 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1257 } else if (isbinary) { 1258 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1259 } 1260 PetscFunctionReturn(0); 1261 } 1262 1263 #undef __FUNCT__ 1264 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1265 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1266 { 1267 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1268 PetscErrorCode ierr; 1269 1270 PetscFunctionBegin; 1271 #if defined(PETSC_USE_LOG) 1272 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1273 #endif 1274 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1275 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1276 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1277 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1278 #if defined(PETSC_USE_CTABLE) 1279 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1280 #else 1281 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1282 #endif 1283 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1284 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1285 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1286 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1287 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1288 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1289 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1290 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1291 1292 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1293 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1294 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr); 1302 PetscFunctionReturn(0); 1303 } 1304 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "MatMult_MPIBAIJ" 1307 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1308 { 1309 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1310 PetscErrorCode ierr; 1311 PetscInt nt; 1312 1313 PetscFunctionBegin; 1314 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1315 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1316 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1317 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1318 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1319 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1320 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1321 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1322 PetscFunctionReturn(0); 1323 } 1324 1325 #undef __FUNCT__ 1326 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1327 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1328 { 1329 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1330 PetscErrorCode ierr; 1331 1332 PetscFunctionBegin; 1333 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1334 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1335 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1336 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1337 PetscFunctionReturn(0); 1338 } 1339 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1342 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1343 { 1344 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1345 PetscErrorCode ierr; 1346 PetscBool merged; 1347 1348 PetscFunctionBegin; 1349 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1350 /* do nondiagonal part */ 1351 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1352 if (!merged) { 1353 /* send it on its way */ 1354 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1355 /* do local part */ 1356 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1357 /* receive remote parts: note this assumes the values are not actually */ 1358 /* inserted in yy until the next line */ 1359 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1360 } else { 1361 /* do local part */ 1362 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1363 /* send it on its way */ 1364 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1365 /* values actually were received in the Begin() but we need to call this nop */ 1366 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1367 } 1368 PetscFunctionReturn(0); 1369 } 1370 1371 #undef __FUNCT__ 1372 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1373 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1374 { 1375 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1376 PetscErrorCode ierr; 1377 1378 PetscFunctionBegin; 1379 /* do nondiagonal part */ 1380 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1381 /* send it on its way */ 1382 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1383 /* do local part */ 1384 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1385 /* receive remote parts: note this assumes the values are not actually */ 1386 /* inserted in yy until the next line, which is true for my implementation*/ 1387 /* but is not perhaps always true. */ 1388 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1389 PetscFunctionReturn(0); 1390 } 1391 1392 /* 1393 This only works correctly for square matrices where the subblock A->A is the 1394 diagonal block 1395 */ 1396 #undef __FUNCT__ 1397 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1398 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1399 { 1400 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1401 PetscErrorCode ierr; 1402 1403 PetscFunctionBegin; 1404 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1405 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1406 PetscFunctionReturn(0); 1407 } 1408 1409 #undef __FUNCT__ 1410 #define __FUNCT__ "MatScale_MPIBAIJ" 1411 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1412 { 1413 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1414 PetscErrorCode ierr; 1415 1416 PetscFunctionBegin; 1417 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1418 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1419 PetscFunctionReturn(0); 1420 } 1421 1422 #undef __FUNCT__ 1423 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1424 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1425 { 1426 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1427 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1428 PetscErrorCode ierr; 1429 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1430 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1431 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1432 1433 PetscFunctionBegin; 1434 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1435 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1436 mat->getrowactive = PETSC_TRUE; 1437 1438 if (!mat->rowvalues && (idx || v)) { 1439 /* 1440 allocate enough space to hold information from the longest row. 1441 */ 1442 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1443 PetscInt max = 1,mbs = mat->mbs,tmp; 1444 for (i=0; i<mbs; i++) { 1445 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1446 if (max < tmp) max = tmp; 1447 } 1448 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1449 } 1450 lrow = row - brstart; 1451 1452 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1453 if (!v) {pvA = 0; pvB = 0;} 1454 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1455 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1456 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1457 nztot = nzA + nzB; 1458 1459 cmap = mat->garray; 1460 if (v || idx) { 1461 if (nztot) { 1462 /* Sort by increasing column numbers, assuming A and B already sorted */ 1463 PetscInt imark = -1; 1464 if (v) { 1465 *v = v_p = mat->rowvalues; 1466 for (i=0; i<nzB; i++) { 1467 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1468 else break; 1469 } 1470 imark = i; 1471 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1472 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1473 } 1474 if (idx) { 1475 *idx = idx_p = mat->rowindices; 1476 if (imark > -1) { 1477 for (i=0; i<imark; i++) { 1478 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1479 } 1480 } else { 1481 for (i=0; i<nzB; i++) { 1482 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1483 else break; 1484 } 1485 imark = i; 1486 } 1487 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1488 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1489 } 1490 } else { 1491 if (idx) *idx = 0; 1492 if (v) *v = 0; 1493 } 1494 } 1495 *nz = nztot; 1496 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1497 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1498 PetscFunctionReturn(0); 1499 } 1500 1501 #undef __FUNCT__ 1502 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1503 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1504 { 1505 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1506 1507 PetscFunctionBegin; 1508 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1509 baij->getrowactive = PETSC_FALSE; 1510 PetscFunctionReturn(0); 1511 } 1512 1513 #undef __FUNCT__ 1514 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1515 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1516 { 1517 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1518 PetscErrorCode ierr; 1519 1520 PetscFunctionBegin; 1521 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1522 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1523 PetscFunctionReturn(0); 1524 } 1525 1526 #undef __FUNCT__ 1527 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1528 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1529 { 1530 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1531 Mat A = a->A,B = a->B; 1532 PetscErrorCode ierr; 1533 PetscReal isend[5],irecv[5]; 1534 1535 PetscFunctionBegin; 1536 info->block_size = (PetscReal)matin->rmap->bs; 1537 1538 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1539 1540 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1541 isend[3] = info->memory; isend[4] = info->mallocs; 1542 1543 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1544 1545 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1546 isend[3] += info->memory; isend[4] += info->mallocs; 1547 1548 if (flag == MAT_LOCAL) { 1549 info->nz_used = isend[0]; 1550 info->nz_allocated = isend[1]; 1551 info->nz_unneeded = isend[2]; 1552 info->memory = isend[3]; 1553 info->mallocs = isend[4]; 1554 } else if (flag == MAT_GLOBAL_MAX) { 1555 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1556 1557 info->nz_used = irecv[0]; 1558 info->nz_allocated = irecv[1]; 1559 info->nz_unneeded = irecv[2]; 1560 info->memory = irecv[3]; 1561 info->mallocs = irecv[4]; 1562 } else if (flag == MAT_GLOBAL_SUM) { 1563 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1564 1565 info->nz_used = irecv[0]; 1566 info->nz_allocated = irecv[1]; 1567 info->nz_unneeded = irecv[2]; 1568 info->memory = irecv[3]; 1569 info->mallocs = irecv[4]; 1570 } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1571 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1572 info->fill_ratio_needed = 0; 1573 info->factor_mallocs = 0; 1574 PetscFunctionReturn(0); 1575 } 1576 1577 #undef __FUNCT__ 1578 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1579 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1580 { 1581 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1582 PetscErrorCode ierr; 1583 1584 PetscFunctionBegin; 1585 switch (op) { 1586 case MAT_NEW_NONZERO_LOCATIONS: 1587 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1588 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1589 case MAT_KEEP_NONZERO_PATTERN: 1590 case MAT_NEW_NONZERO_LOCATION_ERR: 1591 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1592 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1593 break; 1594 case MAT_ROW_ORIENTED: 1595 a->roworiented = flg; 1596 1597 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1598 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1599 break; 1600 case MAT_NEW_DIAGONALS: 1601 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1602 break; 1603 case MAT_IGNORE_OFF_PROC_ENTRIES: 1604 a->donotstash = flg; 1605 break; 1606 case MAT_USE_HASH_TABLE: 1607 a->ht_flag = flg; 1608 break; 1609 case MAT_SYMMETRIC: 1610 case MAT_STRUCTURALLY_SYMMETRIC: 1611 case MAT_HERMITIAN: 1612 case MAT_SYMMETRY_ETERNAL: 1613 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1614 break; 1615 default: 1616 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1617 } 1618 PetscFunctionReturn(0); 1619 } 1620 1621 #undef __FUNCT__ 1622 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1623 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1624 { 1625 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1626 Mat_SeqBAIJ *Aloc; 1627 Mat B; 1628 PetscErrorCode ierr; 1629 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1630 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1631 MatScalar *a; 1632 1633 PetscFunctionBegin; 1634 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1635 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1636 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1637 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1638 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1639 /* Do not know preallocation information, but must set block size */ 1640 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr); 1641 } else { 1642 B = *matout; 1643 } 1644 1645 /* copy over the A part */ 1646 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1647 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1648 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1649 1650 for (i=0; i<mbs; i++) { 1651 rvals[0] = bs*(baij->rstartbs + i); 1652 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1653 for (j=ai[i]; j<ai[i+1]; j++) { 1654 col = (baij->cstartbs+aj[j])*bs; 1655 for (k=0; k<bs; k++) { 1656 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1657 1658 col++; a += bs; 1659 } 1660 } 1661 } 1662 /* copy over the B part */ 1663 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1664 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1665 for (i=0; i<mbs; i++) { 1666 rvals[0] = bs*(baij->rstartbs + i); 1667 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1668 for (j=ai[i]; j<ai[i+1]; j++) { 1669 col = baij->garray[aj[j]]*bs; 1670 for (k=0; k<bs; k++) { 1671 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1672 col++; 1673 a += bs; 1674 } 1675 } 1676 } 1677 ierr = PetscFree(rvals);CHKERRQ(ierr); 1678 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1679 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1680 1681 if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B; 1682 else { 1683 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1684 } 1685 PetscFunctionReturn(0); 1686 } 1687 1688 #undef __FUNCT__ 1689 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1690 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1691 { 1692 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1693 Mat a = baij->A,b = baij->B; 1694 PetscErrorCode ierr; 1695 PetscInt s1,s2,s3; 1696 1697 PetscFunctionBegin; 1698 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1699 if (rr) { 1700 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1701 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1702 /* Overlap communication with computation. */ 1703 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1704 } 1705 if (ll) { 1706 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1707 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1708 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1709 } 1710 /* scale the diagonal block */ 1711 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1712 1713 if (rr) { 1714 /* Do a scatter end and then right scale the off-diagonal block */ 1715 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1716 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1717 } 1718 PetscFunctionReturn(0); 1719 } 1720 1721 #undef __FUNCT__ 1722 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1723 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1724 { 1725 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1726 PetscInt *owners = A->rmap->range; 1727 PetscInt n = A->rmap->n; 1728 PetscMPIInt size = l->size; 1729 PetscSF sf; 1730 PetscInt *lrows; 1731 PetscSFNode *rrows; 1732 PetscInt lastidx = -1, r, p = 0, len = 0; 1733 PetscErrorCode ierr; 1734 1735 PetscFunctionBegin; 1736 /* Create SF where leaves are input rows and roots are owned rows */ 1737 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1738 for (r = 0; r < n; ++r) lrows[r] = -1; 1739 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1740 for (r = 0; r < N; ++r) { 1741 const PetscInt idx = rows[r]; 1742 PetscBool found = PETSC_FALSE; 1743 /* Trick for efficient searching for sorted rows */ 1744 if (lastidx > idx) p = 0; 1745 lastidx = idx; 1746 for (; p < size; ++p) { 1747 if (idx >= owners[p] && idx < owners[p+1]) { 1748 rrows[r].rank = p; 1749 rrows[r].index = rows[r] - owners[p]; 1750 found = PETSC_TRUE; 1751 break; 1752 } 1753 } 1754 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 1755 } 1756 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1757 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1758 /* Collect flags for rows to be zeroed */ 1759 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1760 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1761 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1762 /* Compress and put in row numbers */ 1763 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1764 /* fix right hand side if needed */ 1765 if (x && b) { 1766 const PetscScalar *xx; 1767 PetscScalar *bb; 1768 1769 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1770 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1771 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 1772 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1773 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1774 } 1775 1776 /* actually zap the local rows */ 1777 /* 1778 Zero the required rows. If the "diagonal block" of the matrix 1779 is square and the user wishes to set the diagonal we use separate 1780 code so that MatSetValues() is not called for each diagonal allocating 1781 new memory, thus calling lots of mallocs and slowing things down. 1782 1783 */ 1784 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1785 ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,0,0);CHKERRQ(ierr); 1786 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1787 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,0,0);CHKERRQ(ierr); 1788 } else if (diag != 0.0) { 1789 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr); 1790 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1791 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1792 for (r = 0; r < len; ++r) { 1793 const PetscInt row = lrows[r] + A->rmap->rstart; 1794 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1795 } 1796 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1797 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1798 } else { 1799 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr); 1800 } 1801 ierr = PetscFree(lrows);CHKERRQ(ierr); 1802 PetscFunctionReturn(0); 1803 } 1804 1805 #undef __FUNCT__ 1806 #define __FUNCT__ "MatZeroRowsColumns_MPIBAIJ" 1807 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1808 { 1809 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1810 PetscErrorCode ierr; 1811 PetscMPIInt size = l->size,n = A->rmap->n,lastidx = -1; 1812 PetscInt i,j,k,r,p = 0,len = 0,row,col,count; 1813 PetscInt *lrows,*owners = A->rmap->range; 1814 PetscSFNode *rrows; 1815 PetscSF sf; 1816 const PetscScalar *xx; 1817 PetscScalar *bb,*mask; 1818 Vec xmask,lmask; 1819 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)l->B->data; 1820 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1821 PetscScalar *aa; 1822 #if defined(PETSC_DEBUG) 1823 PetscBool found = PETSC_FALSE; 1824 #endif 1825 1826 PetscFunctionBegin; 1827 /* Create SF where leaves are input rows and roots are owned rows */ 1828 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1829 for (r = 0; r < n; ++r) lrows[r] = -1; 1830 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1831 for (r = 0; r < N; ++r) { 1832 const PetscInt idx = rows[r]; 1833 PetscBool found = PETSC_FALSE; 1834 /* Trick for efficient searching for sorted rows */ 1835 if (lastidx > idx) p = 0; 1836 lastidx = idx; 1837 for (; p < size; ++p) { 1838 if (idx >= owners[p] && idx < owners[p+1]) { 1839 rrows[r].rank = p; 1840 rrows[r].index = rows[r] - owners[p]; 1841 found = PETSC_TRUE; 1842 break; 1843 } 1844 } 1845 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 1846 } 1847 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1848 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1849 /* Collect flags for rows to be zeroed */ 1850 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1851 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1852 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1853 /* Compress and put in row numbers */ 1854 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1855 /* zero diagonal part of matrix */ 1856 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1857 /* handle off diagonal part of matrix */ 1858 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 1859 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1860 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1861 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1862 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1863 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1864 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1865 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1866 if (x) { 1867 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1868 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1869 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1870 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1871 } 1872 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1873 /* remove zeroed rows of off diagonal matrix */ 1874 for (i = 0; i < len; ++i) { 1875 row = lrows[i]; 1876 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1877 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 1878 for (k = 0; k < count; ++k) { 1879 aa[0] = 0.0; 1880 aa += bs; 1881 } 1882 } 1883 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1884 for (i = 0; i < l->B->rmap->N; ++i) { 1885 row = i/bs; 1886 for (j = baij->i[row]; j < baij->i[row+1]; ++j) { 1887 for (k = 0; k < bs; ++k) { 1888 col = bs*baij->j[j] + k; 1889 if (PetscAbsScalar(mask[col])) { 1890 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 1891 if (b) bb[i] -= aa[0]*xx[col]; 1892 aa[0] = 0.0; 1893 } 1894 } 1895 } 1896 } 1897 if (x) { 1898 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1899 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1900 } 1901 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1902 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1903 ierr = PetscFree(lrows);CHKERRQ(ierr); 1904 PetscFunctionReturn(0); 1905 } 1906 1907 #undef __FUNCT__ 1908 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1909 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1910 { 1911 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1912 PetscErrorCode ierr; 1913 1914 PetscFunctionBegin; 1915 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1916 PetscFunctionReturn(0); 1917 } 1918 1919 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 1920 1921 #undef __FUNCT__ 1922 #define __FUNCT__ "MatEqual_MPIBAIJ" 1923 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1924 { 1925 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1926 Mat a,b,c,d; 1927 PetscBool flg; 1928 PetscErrorCode ierr; 1929 1930 PetscFunctionBegin; 1931 a = matA->A; b = matA->B; 1932 c = matB->A; d = matB->B; 1933 1934 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1935 if (flg) { 1936 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1937 } 1938 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1939 PetscFunctionReturn(0); 1940 } 1941 1942 #undef __FUNCT__ 1943 #define __FUNCT__ "MatCopy_MPIBAIJ" 1944 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1945 { 1946 PetscErrorCode ierr; 1947 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1948 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 1949 1950 PetscFunctionBegin; 1951 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1952 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1953 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1954 } else { 1955 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1956 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1957 } 1958 PetscFunctionReturn(0); 1959 } 1960 1961 #undef __FUNCT__ 1962 #define __FUNCT__ "MatSetUp_MPIBAIJ" 1963 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1964 { 1965 PetscErrorCode ierr; 1966 1967 PetscFunctionBegin; 1968 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1969 PetscFunctionReturn(0); 1970 } 1971 1972 #undef __FUNCT__ 1973 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1974 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1975 { 1976 PetscErrorCode ierr; 1977 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 1978 PetscBLASInt bnz,one=1; 1979 Mat_SeqBAIJ *x,*y; 1980 1981 PetscFunctionBegin; 1982 if (str == SAME_NONZERO_PATTERN) { 1983 PetscScalar alpha = a; 1984 x = (Mat_SeqBAIJ*)xx->A->data; 1985 y = (Mat_SeqBAIJ*)yy->A->data; 1986 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 1987 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1988 x = (Mat_SeqBAIJ*)xx->B->data; 1989 y = (Mat_SeqBAIJ*)yy->B->data; 1990 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 1991 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1992 } else { 1993 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1994 } 1995 PetscFunctionReturn(0); 1996 } 1997 1998 #undef __FUNCT__ 1999 #define __FUNCT__ "MatRealPart_MPIBAIJ" 2000 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 2001 { 2002 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2003 PetscErrorCode ierr; 2004 2005 PetscFunctionBegin; 2006 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2007 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2008 PetscFunctionReturn(0); 2009 } 2010 2011 #undef __FUNCT__ 2012 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 2013 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2014 { 2015 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2016 PetscErrorCode ierr; 2017 2018 PetscFunctionBegin; 2019 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2020 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2021 PetscFunctionReturn(0); 2022 } 2023 2024 #undef __FUNCT__ 2025 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2026 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2027 { 2028 PetscErrorCode ierr; 2029 IS iscol_local; 2030 PetscInt csize; 2031 2032 PetscFunctionBegin; 2033 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2034 if (call == MAT_REUSE_MATRIX) { 2035 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2036 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2037 } else { 2038 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2039 } 2040 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2041 if (call == MAT_INITIAL_MATRIX) { 2042 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2043 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2044 } 2045 PetscFunctionReturn(0); 2046 } 2047 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*); 2048 #undef __FUNCT__ 2049 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2050 /* 2051 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2052 in local and then by concatenating the local matrices the end result. 2053 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2054 */ 2055 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2056 { 2057 PetscErrorCode ierr; 2058 PetscMPIInt rank,size; 2059 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2060 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow; 2061 Mat M,Mreuse; 2062 MatScalar *vwork,*aa; 2063 MPI_Comm comm; 2064 IS isrow_new, iscol_new; 2065 PetscBool idflag,allrows, allcols; 2066 Mat_SeqBAIJ *aij; 2067 2068 PetscFunctionBegin; 2069 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2070 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2071 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2072 /* The compression and expansion should be avoided. Doesn't point 2073 out errors, might change the indices, hence buggey */ 2074 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 2075 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 2076 2077 /* Check for special case: each processor gets entire matrix columns */ 2078 ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr); 2079 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 2080 if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE; 2081 else allcols = PETSC_FALSE; 2082 2083 ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr); 2084 ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr); 2085 if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE; 2086 else allrows = PETSC_FALSE; 2087 2088 if (call == MAT_REUSE_MATRIX) { 2089 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 2090 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2091 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2092 } else { 2093 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2094 } 2095 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 2096 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 2097 /* 2098 m - number of local rows 2099 n - number of columns (same on all processors) 2100 rstart - first row in new global matrix generated 2101 */ 2102 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2103 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2104 m = m/bs; 2105 n = n/bs; 2106 2107 if (call == MAT_INITIAL_MATRIX) { 2108 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2109 ii = aij->i; 2110 jj = aij->j; 2111 2112 /* 2113 Determine the number of non-zeros in the diagonal and off-diagonal 2114 portions of the matrix in order to do correct preallocation 2115 */ 2116 2117 /* first get start and end of "diagonal" columns */ 2118 if (csize == PETSC_DECIDE) { 2119 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2120 if (mglobal == n*bs) { /* square matrix */ 2121 nlocal = m; 2122 } else { 2123 nlocal = n/size + ((n % size) > rank); 2124 } 2125 } else { 2126 nlocal = csize/bs; 2127 } 2128 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2129 rstart = rend - nlocal; 2130 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2131 2132 /* next, compute all the lengths */ 2133 ierr = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr); 2134 for (i=0; i<m; i++) { 2135 jend = ii[i+1] - ii[i]; 2136 olen = 0; 2137 dlen = 0; 2138 for (j=0; j<jend; j++) { 2139 if (*jj < rstart || *jj >= rend) olen++; 2140 else dlen++; 2141 jj++; 2142 } 2143 olens[i] = olen; 2144 dlens[i] = dlen; 2145 } 2146 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2147 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2148 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2149 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2150 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 2151 } else { 2152 PetscInt ml,nl; 2153 2154 M = *newmat; 2155 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2156 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2157 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2158 /* 2159 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2160 rather than the slower MatSetValues(). 2161 */ 2162 M->was_assembled = PETSC_TRUE; 2163 M->assembled = PETSC_FALSE; 2164 } 2165 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2166 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2167 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2168 ii = aij->i; 2169 jj = aij->j; 2170 aa = aij->a; 2171 for (i=0; i<m; i++) { 2172 row = rstart/bs + i; 2173 nz = ii[i+1] - ii[i]; 2174 cwork = jj; jj += nz; 2175 vwork = aa; aa += nz*bs*bs; 2176 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2177 } 2178 2179 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2180 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2181 *newmat = M; 2182 2183 /* save submatrix used in processor for next request */ 2184 if (call == MAT_INITIAL_MATRIX) { 2185 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2186 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2187 } 2188 PetscFunctionReturn(0); 2189 } 2190 2191 #undef __FUNCT__ 2192 #define __FUNCT__ "MatPermute_MPIBAIJ" 2193 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2194 { 2195 MPI_Comm comm,pcomm; 2196 PetscInt clocal_size,nrows; 2197 const PetscInt *rows; 2198 PetscMPIInt size; 2199 IS crowp,lcolp; 2200 PetscErrorCode ierr; 2201 2202 PetscFunctionBegin; 2203 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2204 /* make a collective version of 'rowp' */ 2205 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2206 if (pcomm==comm) { 2207 crowp = rowp; 2208 } else { 2209 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2210 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2211 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2212 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2213 } 2214 ierr = ISSetPermutation(crowp);CHKERRQ(ierr); 2215 /* make a local version of 'colp' */ 2216 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2217 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2218 if (size==1) { 2219 lcolp = colp; 2220 } else { 2221 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 2222 } 2223 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2224 /* now we just get the submatrix */ 2225 ierr = MatGetLocalSize(A,PETSC_NULL,&clocal_size);CHKERRQ(ierr); 2226 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2227 /* clean up */ 2228 if (pcomm!=comm) { 2229 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2230 } 2231 if (size>1) { 2232 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2233 } 2234 PetscFunctionReturn(0); 2235 } 2236 2237 #undef __FUNCT__ 2238 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2239 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2240 { 2241 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2242 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2243 2244 PetscFunctionBegin; 2245 if (nghosts) *nghosts = B->nbs; 2246 if (ghosts) *ghosts = baij->garray; 2247 PetscFunctionReturn(0); 2248 } 2249 2250 #undef __FUNCT__ 2251 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ" 2252 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2253 { 2254 Mat B; 2255 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2256 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2257 Mat_SeqAIJ *b; 2258 PetscErrorCode ierr; 2259 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2260 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2261 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2262 2263 PetscFunctionBegin; 2264 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2265 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2266 2267 /* ---------------------------------------------------------------- 2268 Tell every processor the number of nonzeros per row 2269 */ 2270 ierr = PetscMalloc1((A->rmap->N/bs),&lens);CHKERRQ(ierr); 2271 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2272 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2273 } 2274 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2275 ierr = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr); 2276 displs = recvcounts + size; 2277 for (i=0; i<size; i++) { 2278 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2279 displs[i] = A->rmap->range[i]/bs; 2280 } 2281 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2282 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2283 #else 2284 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2285 #endif 2286 /* --------------------------------------------------------------- 2287 Create the sequential matrix of the same type as the local block diagonal 2288 */ 2289 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2290 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2291 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2292 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2293 b = (Mat_SeqAIJ*)B->data; 2294 2295 /*-------------------------------------------------------------------- 2296 Copy my part of matrix column indices over 2297 */ 2298 sendcount = ad->nz + bd->nz; 2299 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2300 a_jsendbuf = ad->j; 2301 b_jsendbuf = bd->j; 2302 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2303 cnt = 0; 2304 for (i=0; i<n; i++) { 2305 2306 /* put in lower diagonal portion */ 2307 m = bd->i[i+1] - bd->i[i]; 2308 while (m > 0) { 2309 /* is it above diagonal (in bd (compressed) numbering) */ 2310 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2311 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2312 m--; 2313 } 2314 2315 /* put in diagonal portion */ 2316 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2317 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2318 } 2319 2320 /* put in upper diagonal portion */ 2321 while (m-- > 0) { 2322 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2323 } 2324 } 2325 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2326 2327 /*-------------------------------------------------------------------- 2328 Gather all column indices to all processors 2329 */ 2330 for (i=0; i<size; i++) { 2331 recvcounts[i] = 0; 2332 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2333 recvcounts[i] += lens[j]; 2334 } 2335 } 2336 displs[0] = 0; 2337 for (i=1; i<size; i++) { 2338 displs[i] = displs[i-1] + recvcounts[i-1]; 2339 } 2340 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2341 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2342 #else 2343 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2344 #endif 2345 /*-------------------------------------------------------------------- 2346 Assemble the matrix into useable form (note numerical values not yet set) 2347 */ 2348 /* set the b->ilen (length of each row) values */ 2349 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2350 /* set the b->i indices */ 2351 b->i[0] = 0; 2352 for (i=1; i<=A->rmap->N/bs; i++) { 2353 b->i[i] = b->i[i-1] + lens[i-1]; 2354 } 2355 ierr = PetscFree(lens);CHKERRQ(ierr); 2356 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2357 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2358 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2359 2360 if (A->symmetric) { 2361 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2362 } else if (A->hermitian) { 2363 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2364 } else if (A->structurally_symmetric) { 2365 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2366 } 2367 *newmat = B; 2368 PetscFunctionReturn(0); 2369 } 2370 2371 #undef __FUNCT__ 2372 #define __FUNCT__ "MatSOR_MPIBAIJ" 2373 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2374 { 2375 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2376 PetscErrorCode ierr; 2377 Vec bb1 = 0; 2378 2379 PetscFunctionBegin; 2380 if (flag == SOR_APPLY_UPPER) { 2381 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2382 PetscFunctionReturn(0); 2383 } 2384 2385 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2386 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2387 } 2388 2389 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2390 if (flag & SOR_ZERO_INITIAL_GUESS) { 2391 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2392 its--; 2393 } 2394 2395 while (its--) { 2396 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2397 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2398 2399 /* update rhs: bb1 = bb - B*x */ 2400 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2401 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2402 2403 /* local sweep */ 2404 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2405 } 2406 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2407 if (flag & SOR_ZERO_INITIAL_GUESS) { 2408 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2409 its--; 2410 } 2411 while (its--) { 2412 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2413 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2414 2415 /* update rhs: bb1 = bb - B*x */ 2416 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2417 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2418 2419 /* local sweep */ 2420 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2421 } 2422 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2423 if (flag & SOR_ZERO_INITIAL_GUESS) { 2424 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2425 its--; 2426 } 2427 while (its--) { 2428 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2429 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2430 2431 /* update rhs: bb1 = bb - B*x */ 2432 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2433 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2434 2435 /* local sweep */ 2436 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2437 } 2438 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2439 2440 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2441 PetscFunctionReturn(0); 2442 } 2443 2444 #undef __FUNCT__ 2445 #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ" 2446 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms) 2447 { 2448 PetscErrorCode ierr; 2449 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data; 2450 PetscInt N,i,*garray = aij->garray; 2451 PetscInt ib,jb,bs = A->rmap->bs; 2452 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data; 2453 MatScalar *a_val = a_aij->a; 2454 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data; 2455 MatScalar *b_val = b_aij->a; 2456 PetscReal *work; 2457 2458 PetscFunctionBegin; 2459 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 2460 ierr = PetscCalloc1(N,&work);CHKERRQ(ierr); 2461 if (type == NORM_2) { 2462 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2463 for (jb=0; jb<bs; jb++) { 2464 for (ib=0; ib<bs; ib++) { 2465 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2466 a_val++; 2467 } 2468 } 2469 } 2470 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2471 for (jb=0; jb<bs; jb++) { 2472 for (ib=0; ib<bs; ib++) { 2473 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2474 b_val++; 2475 } 2476 } 2477 } 2478 } else if (type == NORM_1) { 2479 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2480 for (jb=0; jb<bs; jb++) { 2481 for (ib=0; ib<bs; ib++) { 2482 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2483 a_val++; 2484 } 2485 } 2486 } 2487 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2488 for (jb=0; jb<bs; jb++) { 2489 for (ib=0; ib<bs; ib++) { 2490 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2491 b_val++; 2492 } 2493 } 2494 } 2495 } else if (type == NORM_INFINITY) { 2496 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2497 for (jb=0; jb<bs; jb++) { 2498 for (ib=0; ib<bs; ib++) { 2499 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2500 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2501 a_val++; 2502 } 2503 } 2504 } 2505 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2506 for (jb=0; jb<bs; jb++) { 2507 for (ib=0; ib<bs; ib++) { 2508 int col = garray[b_aij->j[i]] * bs + jb; 2509 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2510 b_val++; 2511 } 2512 } 2513 } 2514 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 2515 if (type == NORM_INFINITY) { 2516 ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2517 } else { 2518 ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2519 } 2520 ierr = PetscFree(work);CHKERRQ(ierr); 2521 if (type == NORM_2) { 2522 for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]); 2523 } 2524 PetscFunctionReturn(0); 2525 } 2526 2527 #undef __FUNCT__ 2528 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ" 2529 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2530 { 2531 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2532 PetscErrorCode ierr; 2533 2534 PetscFunctionBegin; 2535 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2536 PetscFunctionReturn(0); 2537 } 2538 2539 2540 /* -------------------------------------------------------------------*/ 2541 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2542 MatGetRow_MPIBAIJ, 2543 MatRestoreRow_MPIBAIJ, 2544 MatMult_MPIBAIJ, 2545 /* 4*/ MatMultAdd_MPIBAIJ, 2546 MatMultTranspose_MPIBAIJ, 2547 MatMultTransposeAdd_MPIBAIJ, 2548 0, 2549 0, 2550 0, 2551 /*10*/ 0, 2552 0, 2553 0, 2554 MatSOR_MPIBAIJ, 2555 MatTranspose_MPIBAIJ, 2556 /*15*/ MatGetInfo_MPIBAIJ, 2557 MatEqual_MPIBAIJ, 2558 MatGetDiagonal_MPIBAIJ, 2559 MatDiagonalScale_MPIBAIJ, 2560 MatNorm_MPIBAIJ, 2561 /*20*/ MatAssemblyBegin_MPIBAIJ, 2562 MatAssemblyEnd_MPIBAIJ, 2563 MatSetOption_MPIBAIJ, 2564 MatZeroEntries_MPIBAIJ, 2565 /*24*/ MatZeroRows_MPIBAIJ, 2566 0, 2567 0, 2568 0, 2569 0, 2570 /*29*/ MatSetUp_MPIBAIJ, 2571 0, 2572 0, 2573 0, 2574 0, 2575 /*34*/ MatDuplicate_MPIBAIJ, 2576 0, 2577 0, 2578 0, 2579 0, 2580 /*39*/ MatAXPY_MPIBAIJ, 2581 MatGetSubMatrices_MPIBAIJ, 2582 MatIncreaseOverlap_MPIBAIJ, 2583 MatGetValues_MPIBAIJ, 2584 MatCopy_MPIBAIJ, 2585 /*44*/ 0, 2586 MatScale_MPIBAIJ, 2587 0, 2588 0, 2589 MatZeroRowsColumns_MPIBAIJ, 2590 /*49*/ 0, 2591 0, 2592 0, 2593 0, 2594 0, 2595 /*54*/ MatFDColoringCreate_MPIXAIJ, 2596 0, 2597 MatSetUnfactored_MPIBAIJ, 2598 MatPermute_MPIBAIJ, 2599 MatSetValuesBlocked_MPIBAIJ, 2600 /*59*/ MatGetSubMatrix_MPIBAIJ, 2601 MatDestroy_MPIBAIJ, 2602 MatView_MPIBAIJ, 2603 0, 2604 0, 2605 /*64*/ 0, 2606 0, 2607 0, 2608 0, 2609 0, 2610 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2611 0, 2612 0, 2613 0, 2614 0, 2615 /*74*/ 0, 2616 MatFDColoringApply_BAIJ, 2617 0, 2618 0, 2619 0, 2620 /*79*/ 0, 2621 0, 2622 0, 2623 0, 2624 MatLoad_MPIBAIJ, 2625 /*84*/ 0, 2626 0, 2627 0, 2628 0, 2629 0, 2630 /*89*/ 0, 2631 0, 2632 0, 2633 0, 2634 0, 2635 /*94*/ 0, 2636 0, 2637 0, 2638 0, 2639 0, 2640 /*99*/ 0, 2641 0, 2642 0, 2643 0, 2644 0, 2645 /*104*/0, 2646 MatRealPart_MPIBAIJ, 2647 MatImaginaryPart_MPIBAIJ, 2648 0, 2649 0, 2650 /*109*/0, 2651 0, 2652 0, 2653 0, 2654 0, 2655 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2656 0, 2657 MatGetGhosts_MPIBAIJ, 2658 0, 2659 0, 2660 /*119*/0, 2661 0, 2662 0, 2663 0, 2664 MatGetMultiProcBlock_MPIBAIJ, 2665 /*124*/0, 2666 MatGetColumnNorms_MPIBAIJ, 2667 MatInvertBlockDiagonal_MPIBAIJ, 2668 0, 2669 0, 2670 /*129*/ 0, 2671 0, 2672 0, 2673 0, 2674 0, 2675 /*134*/ 0, 2676 0, 2677 0, 2678 0, 2679 0, 2680 /*139*/ 0, 2681 0, 2682 0, 2683 MatFDColoringSetUp_MPIXAIJ 2684 }; 2685 2686 #undef __FUNCT__ 2687 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2688 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2689 { 2690 PetscFunctionBegin; 2691 *a = ((Mat_MPIBAIJ*)A->data)->A; 2692 PetscFunctionReturn(0); 2693 } 2694 2695 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2696 2697 #undef __FUNCT__ 2698 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2699 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2700 { 2701 PetscInt m,rstart,cstart,cend; 2702 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2703 const PetscInt *JJ =0; 2704 PetscScalar *values=0; 2705 PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented; 2706 PetscErrorCode ierr; 2707 2708 PetscFunctionBegin; 2709 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2710 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2711 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2712 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2713 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2714 m = B->rmap->n/bs; 2715 rstart = B->rmap->rstart/bs; 2716 cstart = B->cmap->rstart/bs; 2717 cend = B->cmap->rend/bs; 2718 2719 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2720 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2721 for (i=0; i<m; i++) { 2722 nz = ii[i+1] - ii[i]; 2723 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2724 nz_max = PetscMax(nz_max,nz); 2725 JJ = jj + ii[i]; 2726 for (j=0; j<nz; j++) { 2727 if (*JJ >= cstart) break; 2728 JJ++; 2729 } 2730 d = 0; 2731 for (; j<nz; j++) { 2732 if (*JJ++ >= cend) break; 2733 d++; 2734 } 2735 d_nnz[i] = d; 2736 o_nnz[i] = nz - d; 2737 } 2738 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2739 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2740 2741 values = (PetscScalar*)V; 2742 if (!values) { 2743 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2744 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2745 } 2746 for (i=0; i<m; i++) { 2747 PetscInt row = i + rstart; 2748 PetscInt ncols = ii[i+1] - ii[i]; 2749 const PetscInt *icols = jj + ii[i]; 2750 if (!roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2751 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2752 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2753 } else { /* block ordering does not match so we can only insert one block at a time. */ 2754 PetscInt j; 2755 for (j=0; j<ncols; j++) { 2756 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2757 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2758 } 2759 } 2760 } 2761 2762 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2763 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2764 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2765 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2766 PetscFunctionReturn(0); 2767 } 2768 2769 #undef __FUNCT__ 2770 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2771 /*@C 2772 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2773 (the default parallel PETSc format). 2774 2775 Collective on MPI_Comm 2776 2777 Input Parameters: 2778 + A - the matrix 2779 . bs - the block size 2780 . i - the indices into j for the start of each local row (starts with zero) 2781 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2782 - v - optional values in the matrix 2783 2784 Level: developer 2785 2786 Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 2787 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 2788 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2789 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2790 block column and the second index is over columns within a block. 2791 2792 .keywords: matrix, aij, compressed row, sparse, parallel 2793 2794 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ 2795 @*/ 2796 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2797 { 2798 PetscErrorCode ierr; 2799 2800 PetscFunctionBegin; 2801 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2802 PetscValidType(B,1); 2803 PetscValidLogicalCollectiveInt(B,bs,2); 2804 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2805 PetscFunctionReturn(0); 2806 } 2807 2808 #undef __FUNCT__ 2809 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2810 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2811 { 2812 Mat_MPIBAIJ *b; 2813 PetscErrorCode ierr; 2814 PetscInt i; 2815 2816 PetscFunctionBegin; 2817 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2818 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2819 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2820 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2821 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2822 2823 if (d_nnz) { 2824 for (i=0; i<B->rmap->n/bs; i++) { 2825 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2826 } 2827 } 2828 if (o_nnz) { 2829 for (i=0; i<B->rmap->n/bs; i++) { 2830 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2831 } 2832 } 2833 2834 b = (Mat_MPIBAIJ*)B->data; 2835 b->bs2 = bs*bs; 2836 b->mbs = B->rmap->n/bs; 2837 b->nbs = B->cmap->n/bs; 2838 b->Mbs = B->rmap->N/bs; 2839 b->Nbs = B->cmap->N/bs; 2840 2841 for (i=0; i<=b->size; i++) { 2842 b->rangebs[i] = B->rmap->range[i]/bs; 2843 } 2844 b->rstartbs = B->rmap->rstart/bs; 2845 b->rendbs = B->rmap->rend/bs; 2846 b->cstartbs = B->cmap->rstart/bs; 2847 b->cendbs = B->cmap->rend/bs; 2848 2849 if (!B->preallocated) { 2850 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2851 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2852 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2853 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2854 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2855 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2856 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2857 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2858 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2859 } 2860 2861 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2862 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2863 B->preallocated = PETSC_TRUE; 2864 PetscFunctionReturn(0); 2865 } 2866 2867 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2868 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2869 2870 #undef __FUNCT__ 2871 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 2872 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 2873 { 2874 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2875 PetscErrorCode ierr; 2876 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2877 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2878 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2879 2880 PetscFunctionBegin; 2881 ierr = PetscMalloc1((M+1),&ii);CHKERRQ(ierr); 2882 ii[0] = 0; 2883 for (i=0; i<M; i++) { 2884 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2885 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 2886 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2887 /* remove one from count of matrix has diagonal */ 2888 for (j=id[i]; j<id[i+1]; j++) { 2889 if (jd[j] == i) {ii[i+1]--;break;} 2890 } 2891 } 2892 ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr); 2893 cnt = 0; 2894 for (i=0; i<M; i++) { 2895 for (j=io[i]; j<io[i+1]; j++) { 2896 if (garray[jo[j]] > rstart) break; 2897 jj[cnt++] = garray[jo[j]]; 2898 } 2899 for (k=id[i]; k<id[i+1]; k++) { 2900 if (jd[k] != i) { 2901 jj[cnt++] = rstart + jd[k]; 2902 } 2903 } 2904 for (; j<io[i+1]; j++) { 2905 jj[cnt++] = garray[jo[j]]; 2906 } 2907 } 2908 ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr); 2909 PetscFunctionReturn(0); 2910 } 2911 2912 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2913 2914 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 2915 2916 #undef __FUNCT__ 2917 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 2918 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 2919 { 2920 PetscErrorCode ierr; 2921 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2922 Mat B; 2923 Mat_MPIAIJ *b; 2924 2925 PetscFunctionBegin; 2926 if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 2927 2928 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2929 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2930 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2931 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 2932 ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 2933 b = (Mat_MPIAIJ*) B->data; 2934 2935 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 2936 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2937 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 2938 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 2939 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 2940 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2941 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2942 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2943 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2944 if (reuse == MAT_REUSE_MATRIX) { 2945 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 2946 } else { 2947 *newmat = B; 2948 } 2949 PetscFunctionReturn(0); 2950 } 2951 2952 #if defined(PETSC_HAVE_MUMPS) 2953 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 2954 #endif 2955 2956 /*MC 2957 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2958 2959 Options Database Keys: 2960 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2961 . -mat_block_size <bs> - set the blocksize used to store the matrix 2962 - -mat_use_hash_table <fact> 2963 2964 Level: beginner 2965 2966 .seealso: MatCreateMPIBAIJ 2967 M*/ 2968 2969 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 2970 2971 #undef __FUNCT__ 2972 #define __FUNCT__ "MatCreate_MPIBAIJ" 2973 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2974 { 2975 Mat_MPIBAIJ *b; 2976 PetscErrorCode ierr; 2977 PetscBool flg; 2978 2979 PetscFunctionBegin; 2980 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2981 B->data = (void*)b; 2982 2983 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2984 B->assembled = PETSC_FALSE; 2985 2986 B->insertmode = NOT_SET_VALUES; 2987 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2988 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2989 2990 /* build local table of row and column ownerships */ 2991 ierr = PetscMalloc1((b->size+1),&b->rangebs);CHKERRQ(ierr); 2992 2993 /* build cache for off array entries formed */ 2994 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 2995 2996 b->donotstash = PETSC_FALSE; 2997 b->colmap = NULL; 2998 b->garray = NULL; 2999 b->roworiented = PETSC_TRUE; 3000 3001 /* stuff used in block assembly */ 3002 b->barray = 0; 3003 3004 /* stuff used for matrix vector multiply */ 3005 b->lvec = 0; 3006 b->Mvctx = 0; 3007 3008 /* stuff for MatGetRow() */ 3009 b->rowindices = 0; 3010 b->rowvalues = 0; 3011 b->getrowactive = PETSC_FALSE; 3012 3013 /* hash table stuff */ 3014 b->ht = 0; 3015 b->hd = 0; 3016 b->ht_size = 0; 3017 b->ht_flag = PETSC_FALSE; 3018 b->ht_fact = 0; 3019 b->ht_total_ct = 0; 3020 b->ht_insert_ct = 0; 3021 3022 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 3023 b->ijonly = PETSC_FALSE; 3024 3025 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3026 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr); 3027 if (flg) { 3028 PetscReal fact = 1.39; 3029 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3030 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 3031 if (fact <= 1.0) fact = 1.39; 3032 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3033 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3034 } 3035 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3036 3037 #if defined(PETSC_HAVE_MUMPS) 3038 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3039 #endif 3040 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3041 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3042 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3043 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3044 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3045 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3046 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3047 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3048 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3049 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3050 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr); 3051 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3052 PetscFunctionReturn(0); 3053 } 3054 3055 /*MC 3056 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3057 3058 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3059 and MATMPIBAIJ otherwise. 3060 3061 Options Database Keys: 3062 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3063 3064 Level: beginner 3065 3066 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3067 M*/ 3068 3069 #undef __FUNCT__ 3070 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3071 /*@C 3072 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3073 (block compressed row). For good matrix assembly performance 3074 the user should preallocate the matrix storage by setting the parameters 3075 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3076 performance can be increased by more than a factor of 50. 3077 3078 Collective on Mat 3079 3080 Input Parameters: 3081 + A - the matrix 3082 . bs - size of block 3083 . d_nz - number of block nonzeros per block row in diagonal portion of local 3084 submatrix (same for all local rows) 3085 . d_nnz - array containing the number of block nonzeros in the various block rows 3086 of the in diagonal portion of the local (possibly different for each block 3087 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 3088 set it even if it is zero. 3089 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3090 submatrix (same for all local rows). 3091 - o_nnz - array containing the number of nonzeros in the various block rows of the 3092 off-diagonal portion of the local submatrix (possibly different for 3093 each block row) or NULL. 3094 3095 If the *_nnz parameter is given then the *_nz parameter is ignored 3096 3097 Options Database Keys: 3098 + -mat_block_size - size of the blocks to use 3099 - -mat_use_hash_table <fact> 3100 3101 Notes: 3102 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3103 than it must be used on all processors that share the object for that argument. 3104 3105 Storage Information: 3106 For a square global matrix we define each processor's diagonal portion 3107 to be its local rows and the corresponding columns (a square submatrix); 3108 each processor's off-diagonal portion encompasses the remainder of the 3109 local matrix (a rectangular submatrix). 3110 3111 The user can specify preallocated storage for the diagonal part of 3112 the local submatrix with either d_nz or d_nnz (not both). Set 3113 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3114 memory allocation. Likewise, specify preallocated storage for the 3115 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3116 3117 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3118 the figure below we depict these three local rows and all columns (0-11). 3119 3120 .vb 3121 0 1 2 3 4 5 6 7 8 9 10 11 3122 -------------------------- 3123 row 3 |o o o d d d o o o o o o 3124 row 4 |o o o d d d o o o o o o 3125 row 5 |o o o d d d o o o o o o 3126 -------------------------- 3127 .ve 3128 3129 Thus, any entries in the d locations are stored in the d (diagonal) 3130 submatrix, and any entries in the o locations are stored in the 3131 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3132 stored simply in the MATSEQBAIJ format for compressed row storage. 3133 3134 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3135 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3136 In general, for PDE problems in which most nonzeros are near the diagonal, 3137 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3138 or you will get TERRIBLE performance; see the users' manual chapter on 3139 matrices. 3140 3141 You can call MatGetInfo() to get information on how effective the preallocation was; 3142 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3143 You can also run with the option -info and look for messages with the string 3144 malloc in them to see if additional memory allocation was needed. 3145 3146 Level: intermediate 3147 3148 .keywords: matrix, block, aij, compressed row, sparse, parallel 3149 3150 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3151 @*/ 3152 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3153 { 3154 PetscErrorCode ierr; 3155 3156 PetscFunctionBegin; 3157 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3158 PetscValidType(B,1); 3159 PetscValidLogicalCollectiveInt(B,bs,2); 3160 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3161 PetscFunctionReturn(0); 3162 } 3163 3164 #undef __FUNCT__ 3165 #define __FUNCT__ "MatCreateBAIJ" 3166 /*@C 3167 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3168 (block compressed row). For good matrix assembly performance 3169 the user should preallocate the matrix storage by setting the parameters 3170 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3171 performance can be increased by more than a factor of 50. 3172 3173 Collective on MPI_Comm 3174 3175 Input Parameters: 3176 + comm - MPI communicator 3177 . bs - size of blockk 3178 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3179 This value should be the same as the local size used in creating the 3180 y vector for the matrix-vector product y = Ax. 3181 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3182 This value should be the same as the local size used in creating the 3183 x vector for the matrix-vector product y = Ax. 3184 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3185 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3186 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3187 submatrix (same for all local rows) 3188 . d_nnz - array containing the number of nonzero blocks in the various block rows 3189 of the in diagonal portion of the local (possibly different for each block 3190 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3191 and set it even if it is zero. 3192 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3193 submatrix (same for all local rows). 3194 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3195 off-diagonal portion of the local submatrix (possibly different for 3196 each block row) or NULL. 3197 3198 Output Parameter: 3199 . A - the matrix 3200 3201 Options Database Keys: 3202 + -mat_block_size - size of the blocks to use 3203 - -mat_use_hash_table <fact> 3204 3205 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3206 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3207 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3208 3209 Notes: 3210 If the *_nnz parameter is given then the *_nz parameter is ignored 3211 3212 A nonzero block is any block that as 1 or more nonzeros in it 3213 3214 The user MUST specify either the local or global matrix dimensions 3215 (possibly both). 3216 3217 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3218 than it must be used on all processors that share the object for that argument. 3219 3220 Storage Information: 3221 For a square global matrix we define each processor's diagonal portion 3222 to be its local rows and the corresponding columns (a square submatrix); 3223 each processor's off-diagonal portion encompasses the remainder of the 3224 local matrix (a rectangular submatrix). 3225 3226 The user can specify preallocated storage for the diagonal part of 3227 the local submatrix with either d_nz or d_nnz (not both). Set 3228 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3229 memory allocation. Likewise, specify preallocated storage for the 3230 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3231 3232 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3233 the figure below we depict these three local rows and all columns (0-11). 3234 3235 .vb 3236 0 1 2 3 4 5 6 7 8 9 10 11 3237 -------------------------- 3238 row 3 |o o o d d d o o o o o o 3239 row 4 |o o o d d d o o o o o o 3240 row 5 |o o o d d d o o o o o o 3241 -------------------------- 3242 .ve 3243 3244 Thus, any entries in the d locations are stored in the d (diagonal) 3245 submatrix, and any entries in the o locations are stored in the 3246 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3247 stored simply in the MATSEQBAIJ format for compressed row storage. 3248 3249 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3250 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3251 In general, for PDE problems in which most nonzeros are near the diagonal, 3252 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3253 or you will get TERRIBLE performance; see the users' manual chapter on 3254 matrices. 3255 3256 Level: intermediate 3257 3258 .keywords: matrix, block, aij, compressed row, sparse, parallel 3259 3260 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3261 @*/ 3262 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3263 { 3264 PetscErrorCode ierr; 3265 PetscMPIInt size; 3266 3267 PetscFunctionBegin; 3268 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3269 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3270 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3271 if (size > 1) { 3272 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3273 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3274 } else { 3275 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3276 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3277 } 3278 PetscFunctionReturn(0); 3279 } 3280 3281 #undef __FUNCT__ 3282 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3283 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3284 { 3285 Mat mat; 3286 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3287 PetscErrorCode ierr; 3288 PetscInt len=0; 3289 3290 PetscFunctionBegin; 3291 *newmat = 0; 3292 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3293 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3294 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3295 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3296 3297 mat->factortype = matin->factortype; 3298 mat->preallocated = PETSC_TRUE; 3299 mat->assembled = PETSC_TRUE; 3300 mat->insertmode = NOT_SET_VALUES; 3301 3302 a = (Mat_MPIBAIJ*)mat->data; 3303 mat->rmap->bs = matin->rmap->bs; 3304 a->bs2 = oldmat->bs2; 3305 a->mbs = oldmat->mbs; 3306 a->nbs = oldmat->nbs; 3307 a->Mbs = oldmat->Mbs; 3308 a->Nbs = oldmat->Nbs; 3309 3310 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3311 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3312 3313 a->size = oldmat->size; 3314 a->rank = oldmat->rank; 3315 a->donotstash = oldmat->donotstash; 3316 a->roworiented = oldmat->roworiented; 3317 a->rowindices = 0; 3318 a->rowvalues = 0; 3319 a->getrowactive = PETSC_FALSE; 3320 a->barray = 0; 3321 a->rstartbs = oldmat->rstartbs; 3322 a->rendbs = oldmat->rendbs; 3323 a->cstartbs = oldmat->cstartbs; 3324 a->cendbs = oldmat->cendbs; 3325 3326 /* hash table stuff */ 3327 a->ht = 0; 3328 a->hd = 0; 3329 a->ht_size = 0; 3330 a->ht_flag = oldmat->ht_flag; 3331 a->ht_fact = oldmat->ht_fact; 3332 a->ht_total_ct = 0; 3333 a->ht_insert_ct = 0; 3334 3335 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3336 if (oldmat->colmap) { 3337 #if defined(PETSC_USE_CTABLE) 3338 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3339 #else 3340 ierr = PetscMalloc1((a->Nbs),&a->colmap);CHKERRQ(ierr); 3341 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3342 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3343 #endif 3344 } else a->colmap = 0; 3345 3346 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3347 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 3348 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3349 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3350 } else a->garray = 0; 3351 3352 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3353 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3354 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3355 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3356 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3357 3358 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3359 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3360 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3361 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3362 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3363 *newmat = mat; 3364 PetscFunctionReturn(0); 3365 } 3366 3367 #undef __FUNCT__ 3368 #define __FUNCT__ "MatLoad_MPIBAIJ" 3369 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3370 { 3371 PetscErrorCode ierr; 3372 int fd; 3373 PetscInt i,nz,j,rstart,rend; 3374 PetscScalar *vals,*buf; 3375 MPI_Comm comm; 3376 MPI_Status status; 3377 PetscMPIInt rank,size,maxnz; 3378 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3379 PetscInt *locrowlens = NULL,*procsnz = NULL,*browners = NULL; 3380 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3381 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3382 PetscInt *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount; 3383 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3384 3385 PetscFunctionBegin; 3386 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3387 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3388 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3389 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3390 3391 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3392 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3393 if (!rank) { 3394 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3395 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3396 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3397 } 3398 3399 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3400 3401 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3402 M = header[1]; N = header[2]; 3403 3404 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3405 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3406 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3407 3408 /* If global sizes are set, check if they are consistent with that given in the file */ 3409 if (sizesset) { 3410 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3411 } 3412 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3413 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3414 3415 if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices"); 3416 3417 /* 3418 This code adds extra rows to make sure the number of rows is 3419 divisible by the blocksize 3420 */ 3421 Mbs = M/bs; 3422 extra_rows = bs - M + bs*Mbs; 3423 if (extra_rows == bs) extra_rows = 0; 3424 else Mbs++; 3425 if (extra_rows && !rank) { 3426 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3427 } 3428 3429 /* determine ownership of all rows */ 3430 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3431 mbs = Mbs/size + ((Mbs % size) > rank); 3432 m = mbs*bs; 3433 } else { /* User set */ 3434 m = newmat->rmap->n; 3435 mbs = m/bs; 3436 } 3437 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 3438 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3439 3440 /* process 0 needs enough room for process with most rows */ 3441 if (!rank) { 3442 mmax = rowners[1]; 3443 for (i=2; i<=size; i++) { 3444 mmax = PetscMax(mmax,rowners[i]); 3445 } 3446 mmax*=bs; 3447 } else mmax = -1; /* unused, but compiler warns anyway */ 3448 3449 rowners[0] = 0; 3450 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3451 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3452 rstart = rowners[rank]; 3453 rend = rowners[rank+1]; 3454 3455 /* distribute row lengths to all processors */ 3456 ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr); 3457 if (!rank) { 3458 mend = m; 3459 if (size == 1) mend = mend - extra_rows; 3460 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3461 for (j=mend; j<m; j++) locrowlens[j] = 1; 3462 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3463 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3464 for (j=0; j<m; j++) { 3465 procsnz[0] += locrowlens[j]; 3466 } 3467 for (i=1; i<size; i++) { 3468 mend = browners[i+1] - browners[i]; 3469 if (i == size-1) mend = mend - extra_rows; 3470 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3471 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3472 /* calculate the number of nonzeros on each processor */ 3473 for (j=0; j<browners[i+1]-browners[i]; j++) { 3474 procsnz[i] += rowlengths[j]; 3475 } 3476 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3477 } 3478 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3479 } else { 3480 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3481 } 3482 3483 if (!rank) { 3484 /* determine max buffer needed and allocate it */ 3485 maxnz = procsnz[0]; 3486 for (i=1; i<size; i++) { 3487 maxnz = PetscMax(maxnz,procsnz[i]); 3488 } 3489 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3490 3491 /* read in my part of the matrix column indices */ 3492 nz = procsnz[0]; 3493 ierr = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr); 3494 mycols = ibuf; 3495 if (size == 1) nz -= extra_rows; 3496 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3497 if (size == 1) { 3498 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 3499 } 3500 3501 /* read in every ones (except the last) and ship off */ 3502 for (i=1; i<size-1; i++) { 3503 nz = procsnz[i]; 3504 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3505 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3506 } 3507 /* read in the stuff for the last proc */ 3508 if (size != 1) { 3509 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3510 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3511 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3512 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3513 } 3514 ierr = PetscFree(cols);CHKERRQ(ierr); 3515 } else { 3516 /* determine buffer space needed for message */ 3517 nz = 0; 3518 for (i=0; i<m; i++) { 3519 nz += locrowlens[i]; 3520 } 3521 ierr = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr); 3522 mycols = ibuf; 3523 /* receive message of column indices*/ 3524 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3525 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3526 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3527 } 3528 3529 /* loop over local rows, determining number of off diagonal entries */ 3530 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 3531 ierr = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 3532 rowcount = 0; nzcount = 0; 3533 for (i=0; i<mbs; i++) { 3534 dcount = 0; 3535 odcount = 0; 3536 for (j=0; j<bs; j++) { 3537 kmax = locrowlens[rowcount]; 3538 for (k=0; k<kmax; k++) { 3539 tmp = mycols[nzcount++]/bs; 3540 if (!mask[tmp]) { 3541 mask[tmp] = 1; 3542 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3543 else masked1[dcount++] = tmp; 3544 } 3545 } 3546 rowcount++; 3547 } 3548 3549 dlens[i] = dcount; 3550 odlens[i] = odcount; 3551 3552 /* zero out the mask elements we set */ 3553 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3554 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3555 } 3556 3557 3558 if (!sizesset) { 3559 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3560 } 3561 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3562 3563 if (!rank) { 3564 ierr = PetscMalloc1((maxnz+1),&buf);CHKERRQ(ierr); 3565 /* read in my part of the matrix numerical values */ 3566 nz = procsnz[0]; 3567 vals = buf; 3568 mycols = ibuf; 3569 if (size == 1) nz -= extra_rows; 3570 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3571 if (size == 1) { 3572 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 3573 } 3574 3575 /* insert into matrix */ 3576 jj = rstart*bs; 3577 for (i=0; i<m; i++) { 3578 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3579 mycols += locrowlens[i]; 3580 vals += locrowlens[i]; 3581 jj++; 3582 } 3583 /* read in other processors (except the last one) and ship out */ 3584 for (i=1; i<size-1; i++) { 3585 nz = procsnz[i]; 3586 vals = buf; 3587 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3588 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3589 } 3590 /* the last proc */ 3591 if (size != 1) { 3592 nz = procsnz[i] - extra_rows; 3593 vals = buf; 3594 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3595 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3596 ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3597 } 3598 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3599 } else { 3600 /* receive numeric values */ 3601 ierr = PetscMalloc1((nz+1),&buf);CHKERRQ(ierr); 3602 3603 /* receive message of values*/ 3604 vals = buf; 3605 mycols = ibuf; 3606 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3607 3608 /* insert into matrix */ 3609 jj = rstart*bs; 3610 for (i=0; i<m; i++) { 3611 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3612 mycols += locrowlens[i]; 3613 vals += locrowlens[i]; 3614 jj++; 3615 } 3616 } 3617 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3618 ierr = PetscFree(buf);CHKERRQ(ierr); 3619 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3620 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3621 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3622 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3623 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3624 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3625 PetscFunctionReturn(0); 3626 } 3627 3628 #undef __FUNCT__ 3629 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3630 /*@ 3631 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3632 3633 Input Parameters: 3634 . mat - the matrix 3635 . fact - factor 3636 3637 Not Collective, each process can use a different factor 3638 3639 Level: advanced 3640 3641 Notes: 3642 This can also be set by the command line option: -mat_use_hash_table <fact> 3643 3644 .keywords: matrix, hashtable, factor, HT 3645 3646 .seealso: MatSetOption() 3647 @*/ 3648 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3649 { 3650 PetscErrorCode ierr; 3651 3652 PetscFunctionBegin; 3653 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3654 PetscFunctionReturn(0); 3655 } 3656 3657 #undef __FUNCT__ 3658 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3659 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3660 { 3661 Mat_MPIBAIJ *baij; 3662 3663 PetscFunctionBegin; 3664 baij = (Mat_MPIBAIJ*)mat->data; 3665 baij->ht_fact = fact; 3666 PetscFunctionReturn(0); 3667 } 3668 3669 #undef __FUNCT__ 3670 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3671 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3672 { 3673 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3674 3675 PetscFunctionBegin; 3676 *Ad = a->A; 3677 *Ao = a->B; 3678 *colmap = a->garray; 3679 PetscFunctionReturn(0); 3680 } 3681 3682 /* 3683 Special version for direct calls from Fortran (to eliminate two function call overheads 3684 */ 3685 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3686 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3687 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3688 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3689 #endif 3690 3691 #undef __FUNCT__ 3692 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3693 /*@C 3694 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3695 3696 Collective on Mat 3697 3698 Input Parameters: 3699 + mat - the matrix 3700 . min - number of input rows 3701 . im - input rows 3702 . nin - number of input columns 3703 . in - input columns 3704 . v - numerical values input 3705 - addvin - INSERT_VALUES or ADD_VALUES 3706 3707 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3708 3709 Level: advanced 3710 3711 .seealso: MatSetValuesBlocked() 3712 @*/ 3713 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3714 { 3715 /* convert input arguments to C version */ 3716 Mat mat = *matin; 3717 PetscInt m = *min, n = *nin; 3718 InsertMode addv = *addvin; 3719 3720 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3721 const MatScalar *value; 3722 MatScalar *barray = baij->barray; 3723 PetscBool roworiented = baij->roworiented; 3724 PetscErrorCode ierr; 3725 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3726 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3727 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3728 3729 PetscFunctionBegin; 3730 /* tasks normally handled by MatSetValuesBlocked() */ 3731 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3732 #if defined(PETSC_USE_DEBUG) 3733 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3734 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3735 #endif 3736 if (mat->assembled) { 3737 mat->was_assembled = PETSC_TRUE; 3738 mat->assembled = PETSC_FALSE; 3739 } 3740 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3741 3742 3743 if (!barray) { 3744 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 3745 baij->barray = barray; 3746 } 3747 3748 if (roworiented) stepval = (n-1)*bs; 3749 else stepval = (m-1)*bs; 3750 3751 for (i=0; i<m; i++) { 3752 if (im[i] < 0) continue; 3753 #if defined(PETSC_USE_DEBUG) 3754 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3755 #endif 3756 if (im[i] >= rstart && im[i] < rend) { 3757 row = im[i] - rstart; 3758 for (j=0; j<n; j++) { 3759 /* If NumCol = 1 then a copy is not required */ 3760 if ((roworiented) && (n == 1)) { 3761 barray = (MatScalar*)v + i*bs2; 3762 } else if ((!roworiented) && (m == 1)) { 3763 barray = (MatScalar*)v + j*bs2; 3764 } else { /* Here a copy is required */ 3765 if (roworiented) { 3766 value = v + i*(stepval+bs)*bs + j*bs; 3767 } else { 3768 value = v + j*(stepval+bs)*bs + i*bs; 3769 } 3770 for (ii=0; ii<bs; ii++,value+=stepval) { 3771 for (jj=0; jj<bs; jj++) { 3772 *barray++ = *value++; 3773 } 3774 } 3775 barray -=bs2; 3776 } 3777 3778 if (in[j] >= cstart && in[j] < cend) { 3779 col = in[j] - cstart; 3780 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3781 } else if (in[j] < 0) continue; 3782 #if defined(PETSC_USE_DEBUG) 3783 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 3784 #endif 3785 else { 3786 if (mat->was_assembled) { 3787 if (!baij->colmap) { 3788 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3789 } 3790 3791 #if defined(PETSC_USE_DEBUG) 3792 #if defined(PETSC_USE_CTABLE) 3793 { PetscInt data; 3794 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3795 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3796 } 3797 #else 3798 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3799 #endif 3800 #endif 3801 #if defined(PETSC_USE_CTABLE) 3802 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3803 col = (col - 1)/bs; 3804 #else 3805 col = (baij->colmap[in[j]] - 1)/bs; 3806 #endif 3807 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3808 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3809 col = in[j]; 3810 } 3811 } else col = in[j]; 3812 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3813 } 3814 } 3815 } else { 3816 if (!baij->donotstash) { 3817 if (roworiented) { 3818 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3819 } else { 3820 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3821 } 3822 } 3823 } 3824 } 3825 3826 /* task normally handled by MatSetValuesBlocked() */ 3827 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3828 PetscFunctionReturn(0); 3829 } 3830 3831 #undef __FUNCT__ 3832 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 3833 /*@ 3834 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 3835 CSR format the local rows. 3836 3837 Collective on MPI_Comm 3838 3839 Input Parameters: 3840 + comm - MPI communicator 3841 . bs - the block size, only a block size of 1 is supported 3842 . m - number of local rows (Cannot be PETSC_DECIDE) 3843 . n - This value should be the same as the local size used in creating the 3844 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3845 calculated if N is given) For square matrices n is almost always m. 3846 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3847 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3848 . i - row indices 3849 . j - column indices 3850 - a - matrix values 3851 3852 Output Parameter: 3853 . mat - the matrix 3854 3855 Level: intermediate 3856 3857 Notes: 3858 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3859 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3860 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3861 3862 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3863 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3864 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3865 with column-major ordering within blocks. 3866 3867 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3868 3869 .keywords: matrix, aij, compressed row, sparse, parallel 3870 3871 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3872 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3873 @*/ 3874 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3875 { 3876 PetscErrorCode ierr; 3877 3878 PetscFunctionBegin; 3879 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3880 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3881 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3882 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3883 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 3884 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3885 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3886 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3887 PetscFunctionReturn(0); 3888 } 3889