1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 #include <petscsf.h> 5 6 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 7 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat); 8 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 9 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 10 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 11 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec); 14 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 17 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 18 { 19 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 20 PetscErrorCode ierr; 21 PetscInt i,*idxb = 0; 22 PetscScalar *va,*vb; 23 Vec vtmp; 24 25 PetscFunctionBegin; 26 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 27 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 28 if (idx) { 29 for (i=0; i<A->rmap->n; i++) { 30 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 31 } 32 } 33 34 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 35 if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);} 36 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 37 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 38 39 for (i=0; i<A->rmap->n; i++) { 40 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 41 va[i] = vb[i]; 42 if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs); 43 } 44 } 45 46 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 47 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 48 ierr = PetscFree(idxb);CHKERRQ(ierr); 49 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 50 PetscFunctionReturn(0); 51 } 52 53 #undef __FUNCT__ 54 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 55 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 56 { 57 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 58 PetscErrorCode ierr; 59 60 PetscFunctionBegin; 61 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 62 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 63 PetscFunctionReturn(0); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 68 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 69 { 70 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 75 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 76 PetscFunctionReturn(0); 77 } 78 79 /* 80 Local utility routine that creates a mapping from the global column 81 number to the local number in the off-diagonal part of the local 82 storage of the matrix. This is done in a non scalable way since the 83 length of colmap equals the global matrix length. 84 */ 85 #undef __FUNCT__ 86 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private" 87 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 88 { 89 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 90 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 91 PetscErrorCode ierr; 92 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 93 94 PetscFunctionBegin; 95 #if defined(PETSC_USE_CTABLE) 96 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 97 for (i=0; i<nbs; i++) { 98 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 99 } 100 #else 101 ierr = PetscMalloc1((baij->Nbs+1),&baij->colmap);CHKERRQ(ierr); 102 ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 103 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 104 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 105 #endif 106 PetscFunctionReturn(0); 107 } 108 109 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 110 { \ 111 \ 112 brow = row/bs; \ 113 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 114 rmax = aimax[brow]; nrow = ailen[brow]; \ 115 bcol = col/bs; \ 116 ridx = row % bs; cidx = col % bs; \ 117 low = 0; high = nrow; \ 118 while (high-low > 3) { \ 119 t = (low+high)/2; \ 120 if (rp[t] > bcol) high = t; \ 121 else low = t; \ 122 } \ 123 for (_i=low; _i<high; _i++) { \ 124 if (rp[_i] > bcol) break; \ 125 if (rp[_i] == bcol) { \ 126 bap = ap + bs2*_i + bs*cidx + ridx; \ 127 if (addv == ADD_VALUES) *bap += value; \ 128 else *bap = value; \ 129 goto a_noinsert; \ 130 } \ 131 } \ 132 if (a->nonew == 1) goto a_noinsert; \ 133 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 134 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 135 N = nrow++ - 1; \ 136 /* shift up all the later entries in this row */ \ 137 for (ii=N; ii>=_i; ii--) { \ 138 rp[ii+1] = rp[ii]; \ 139 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 140 } \ 141 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 142 rp[_i] = bcol; \ 143 ap[bs2*_i + bs*cidx + ridx] = value; \ 144 a_noinsert:; \ 145 ailen[brow] = nrow; \ 146 } 147 148 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 149 { \ 150 brow = row/bs; \ 151 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 152 rmax = bimax[brow]; nrow = bilen[brow]; \ 153 bcol = col/bs; \ 154 ridx = row % bs; cidx = col % bs; \ 155 low = 0; high = nrow; \ 156 while (high-low > 3) { \ 157 t = (low+high)/2; \ 158 if (rp[t] > bcol) high = t; \ 159 else low = t; \ 160 } \ 161 for (_i=low; _i<high; _i++) { \ 162 if (rp[_i] > bcol) break; \ 163 if (rp[_i] == bcol) { \ 164 bap = ap + bs2*_i + bs*cidx + ridx; \ 165 if (addv == ADD_VALUES) *bap += value; \ 166 else *bap = value; \ 167 goto b_noinsert; \ 168 } \ 169 } \ 170 if (b->nonew == 1) goto b_noinsert; \ 171 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 172 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 173 N = nrow++ - 1; \ 174 /* shift up all the later entries in this row */ \ 175 for (ii=N; ii>=_i; ii--) { \ 176 rp[ii+1] = rp[ii]; \ 177 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 178 } \ 179 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 180 rp[_i] = bcol; \ 181 ap[bs2*_i + bs*cidx + ridx] = value; \ 182 b_noinsert:; \ 183 bilen[brow] = nrow; \ 184 } 185 186 #undef __FUNCT__ 187 #define __FUNCT__ "MatSetValues_MPIBAIJ" 188 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 189 { 190 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 191 MatScalar value; 192 PetscBool roworiented = baij->roworiented; 193 PetscErrorCode ierr; 194 PetscInt i,j,row,col; 195 PetscInt rstart_orig=mat->rmap->rstart; 196 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 197 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 198 199 /* Some Variables required in the macro */ 200 Mat A = baij->A; 201 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 202 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 203 MatScalar *aa =a->a; 204 205 Mat B = baij->B; 206 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 207 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 208 MatScalar *ba =b->a; 209 210 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 211 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 212 MatScalar *ap,*bap; 213 214 PetscFunctionBegin; 215 if (v) PetscValidScalarPointer(v,6); 216 for (i=0; i<m; i++) { 217 if (im[i] < 0) continue; 218 #if defined(PETSC_USE_DEBUG) 219 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 220 #endif 221 if (im[i] >= rstart_orig && im[i] < rend_orig) { 222 row = im[i] - rstart_orig; 223 for (j=0; j<n; j++) { 224 if (in[j] >= cstart_orig && in[j] < cend_orig) { 225 col = in[j] - cstart_orig; 226 if (roworiented) value = v[i*n+j]; 227 else value = v[i+j*m]; 228 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 229 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 230 } else if (in[j] < 0) continue; 231 #if defined(PETSC_USE_DEBUG) 232 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 233 #endif 234 else { 235 if (mat->was_assembled) { 236 if (!baij->colmap) { 237 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 238 } 239 #if defined(PETSC_USE_CTABLE) 240 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 241 col = col - 1; 242 #else 243 col = baij->colmap[in[j]/bs] - 1; 244 #endif 245 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 246 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 247 col = in[j]; 248 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 249 B = baij->B; 250 b = (Mat_SeqBAIJ*)(B)->data; 251 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 252 ba =b->a; 253 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 254 else col += in[j]%bs; 255 } else col = in[j]; 256 if (roworiented) value = v[i*n+j]; 257 else value = v[i+j*m]; 258 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 259 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 260 } 261 } 262 } else { 263 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 264 if (!baij->donotstash) { 265 mat->assembled = PETSC_FALSE; 266 if (roworiented) { 267 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 268 } else { 269 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 270 } 271 } 272 } 273 } 274 PetscFunctionReturn(0); 275 } 276 277 #undef __FUNCT__ 278 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 279 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 280 { 281 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 282 const PetscScalar *value; 283 MatScalar *barray = baij->barray; 284 PetscBool roworiented = baij->roworiented; 285 PetscErrorCode ierr; 286 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 287 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 288 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 289 290 PetscFunctionBegin; 291 if (!barray) { 292 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 293 baij->barray = barray; 294 } 295 296 if (roworiented) stepval = (n-1)*bs; 297 else stepval = (m-1)*bs; 298 299 for (i=0; i<m; i++) { 300 if (im[i] < 0) continue; 301 #if defined(PETSC_USE_DEBUG) 302 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 303 #endif 304 if (im[i] >= rstart && im[i] < rend) { 305 row = im[i] - rstart; 306 for (j=0; j<n; j++) { 307 /* If NumCol = 1 then a copy is not required */ 308 if ((roworiented) && (n == 1)) { 309 barray = (MatScalar*)v + i*bs2; 310 } else if ((!roworiented) && (m == 1)) { 311 barray = (MatScalar*)v + j*bs2; 312 } else { /* Here a copy is required */ 313 if (roworiented) { 314 value = v + (i*(stepval+bs) + j)*bs; 315 } else { 316 value = v + (j*(stepval+bs) + i)*bs; 317 } 318 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 319 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 320 barray += bs; 321 } 322 barray -= bs2; 323 } 324 325 if (in[j] >= cstart && in[j] < cend) { 326 col = in[j] - cstart; 327 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 328 } else if (in[j] < 0) continue; 329 #if defined(PETSC_USE_DEBUG) 330 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 331 #endif 332 else { 333 if (mat->was_assembled) { 334 if (!baij->colmap) { 335 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 336 } 337 338 #if defined(PETSC_USE_DEBUG) 339 #if defined(PETSC_USE_CTABLE) 340 { PetscInt data; 341 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 342 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 343 } 344 #else 345 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 346 #endif 347 #endif 348 #if defined(PETSC_USE_CTABLE) 349 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 350 col = (col - 1)/bs; 351 #else 352 col = (baij->colmap[in[j]] - 1)/bs; 353 #endif 354 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 355 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 356 col = in[j]; 357 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]); 358 } else col = in[j]; 359 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 360 } 361 } 362 } else { 363 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 364 if (!baij->donotstash) { 365 if (roworiented) { 366 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 367 } else { 368 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 369 } 370 } 371 } 372 } 373 PetscFunctionReturn(0); 374 } 375 376 #define HASH_KEY 0.6180339887 377 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 378 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 379 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 380 #undef __FUNCT__ 381 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 382 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 383 { 384 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 385 PetscBool roworiented = baij->roworiented; 386 PetscErrorCode ierr; 387 PetscInt i,j,row,col; 388 PetscInt rstart_orig=mat->rmap->rstart; 389 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 390 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 391 PetscReal tmp; 392 MatScalar **HD = baij->hd,value; 393 #if defined(PETSC_USE_DEBUG) 394 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 395 #endif 396 397 PetscFunctionBegin; 398 if (v) PetscValidScalarPointer(v,6); 399 for (i=0; i<m; i++) { 400 #if defined(PETSC_USE_DEBUG) 401 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 402 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 403 #endif 404 row = im[i]; 405 if (row >= rstart_orig && row < rend_orig) { 406 for (j=0; j<n; j++) { 407 col = in[j]; 408 if (roworiented) value = v[i*n+j]; 409 else value = v[i+j*m]; 410 /* Look up PetscInto the Hash Table */ 411 key = (row/bs)*Nbs+(col/bs)+1; 412 h1 = HASH(size,key,tmp); 413 414 415 idx = h1; 416 #if defined(PETSC_USE_DEBUG) 417 insert_ct++; 418 total_ct++; 419 if (HT[idx] != key) { 420 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 421 if (idx == size) { 422 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 423 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 424 } 425 } 426 #else 427 if (HT[idx] != key) { 428 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 429 if (idx == size) { 430 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 431 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 432 } 433 } 434 #endif 435 /* A HASH table entry is found, so insert the values at the correct address */ 436 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 437 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 438 } 439 } else if (!baij->donotstash) { 440 if (roworiented) { 441 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 442 } else { 443 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 444 } 445 } 446 } 447 #if defined(PETSC_USE_DEBUG) 448 baij->ht_total_ct = total_ct; 449 baij->ht_insert_ct = insert_ct; 450 #endif 451 PetscFunctionReturn(0); 452 } 453 454 #undef __FUNCT__ 455 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 456 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 457 { 458 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 459 PetscBool roworiented = baij->roworiented; 460 PetscErrorCode ierr; 461 PetscInt i,j,ii,jj,row,col; 462 PetscInt rstart=baij->rstartbs; 463 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 464 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 465 PetscReal tmp; 466 MatScalar **HD = baij->hd,*baij_a; 467 const PetscScalar *v_t,*value; 468 #if defined(PETSC_USE_DEBUG) 469 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 470 #endif 471 472 PetscFunctionBegin; 473 if (roworiented) stepval = (n-1)*bs; 474 else stepval = (m-1)*bs; 475 476 for (i=0; i<m; i++) { 477 #if defined(PETSC_USE_DEBUG) 478 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 479 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 480 #endif 481 row = im[i]; 482 v_t = v + i*nbs2; 483 if (row >= rstart && row < rend) { 484 for (j=0; j<n; j++) { 485 col = in[j]; 486 487 /* Look up into the Hash Table */ 488 key = row*Nbs+col+1; 489 h1 = HASH(size,key,tmp); 490 491 idx = h1; 492 #if defined(PETSC_USE_DEBUG) 493 total_ct++; 494 insert_ct++; 495 if (HT[idx] != key) { 496 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 497 if (idx == size) { 498 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 499 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 500 } 501 } 502 #else 503 if (HT[idx] != key) { 504 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 505 if (idx == size) { 506 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 507 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 508 } 509 } 510 #endif 511 baij_a = HD[idx]; 512 if (roworiented) { 513 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 514 /* value = v + (i*(stepval+bs)+j)*bs; */ 515 value = v_t; 516 v_t += bs; 517 if (addv == ADD_VALUES) { 518 for (ii=0; ii<bs; ii++,value+=stepval) { 519 for (jj=ii; jj<bs2; jj+=bs) { 520 baij_a[jj] += *value++; 521 } 522 } 523 } else { 524 for (ii=0; ii<bs; ii++,value+=stepval) { 525 for (jj=ii; jj<bs2; jj+=bs) { 526 baij_a[jj] = *value++; 527 } 528 } 529 } 530 } else { 531 value = v + j*(stepval+bs)*bs + i*bs; 532 if (addv == ADD_VALUES) { 533 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 534 for (jj=0; jj<bs; jj++) { 535 baij_a[jj] += *value++; 536 } 537 } 538 } else { 539 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 540 for (jj=0; jj<bs; jj++) { 541 baij_a[jj] = *value++; 542 } 543 } 544 } 545 } 546 } 547 } else { 548 if (!baij->donotstash) { 549 if (roworiented) { 550 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 551 } else { 552 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 553 } 554 } 555 } 556 } 557 #if defined(PETSC_USE_DEBUG) 558 baij->ht_total_ct = total_ct; 559 baij->ht_insert_ct = insert_ct; 560 #endif 561 PetscFunctionReturn(0); 562 } 563 564 #undef __FUNCT__ 565 #define __FUNCT__ "MatGetValues_MPIBAIJ" 566 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 567 { 568 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 569 PetscErrorCode ierr; 570 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 571 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 572 573 PetscFunctionBegin; 574 for (i=0; i<m; i++) { 575 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 576 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 577 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 578 row = idxm[i] - bsrstart; 579 for (j=0; j<n; j++) { 580 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 581 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 582 if (idxn[j] >= bscstart && idxn[j] < bscend) { 583 col = idxn[j] - bscstart; 584 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 585 } else { 586 if (!baij->colmap) { 587 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 588 } 589 #if defined(PETSC_USE_CTABLE) 590 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 591 data--; 592 #else 593 data = baij->colmap[idxn[j]/bs]-1; 594 #endif 595 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 596 else { 597 col = data + idxn[j]%bs; 598 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 599 } 600 } 601 } 602 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 603 } 604 PetscFunctionReturn(0); 605 } 606 607 #undef __FUNCT__ 608 #define __FUNCT__ "MatNorm_MPIBAIJ" 609 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 610 { 611 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 612 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 613 PetscErrorCode ierr; 614 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 615 PetscReal sum = 0.0; 616 MatScalar *v; 617 618 PetscFunctionBegin; 619 if (baij->size == 1) { 620 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 621 } else { 622 if (type == NORM_FROBENIUS) { 623 v = amat->a; 624 nz = amat->nz*bs2; 625 for (i=0; i<nz; i++) { 626 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 627 } 628 v = bmat->a; 629 nz = bmat->nz*bs2; 630 for (i=0; i<nz; i++) { 631 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 632 } 633 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 634 *nrm = PetscSqrtReal(*nrm); 635 } else if (type == NORM_1) { /* max column sum */ 636 PetscReal *tmp,*tmp2; 637 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 638 ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr); 639 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 640 v = amat->a; jj = amat->j; 641 for (i=0; i<amat->nz; i++) { 642 for (j=0; j<bs; j++) { 643 col = bs*(cstart + *jj) + j; /* column index */ 644 for (row=0; row<bs; row++) { 645 tmp[col] += PetscAbsScalar(*v); v++; 646 } 647 } 648 jj++; 649 } 650 v = bmat->a; jj = bmat->j; 651 for (i=0; i<bmat->nz; i++) { 652 for (j=0; j<bs; j++) { 653 col = bs*garray[*jj] + j; 654 for (row=0; row<bs; row++) { 655 tmp[col] += PetscAbsScalar(*v); v++; 656 } 657 } 658 jj++; 659 } 660 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 661 *nrm = 0.0; 662 for (j=0; j<mat->cmap->N; j++) { 663 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 664 } 665 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 666 } else if (type == NORM_INFINITY) { /* max row sum */ 667 PetscReal *sums; 668 ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr); 669 sum = 0.0; 670 for (j=0; j<amat->mbs; j++) { 671 for (row=0; row<bs; row++) sums[row] = 0.0; 672 v = amat->a + bs2*amat->i[j]; 673 nz = amat->i[j+1]-amat->i[j]; 674 for (i=0; i<nz; i++) { 675 for (col=0; col<bs; col++) { 676 for (row=0; row<bs; row++) { 677 sums[row] += PetscAbsScalar(*v); v++; 678 } 679 } 680 } 681 v = bmat->a + bs2*bmat->i[j]; 682 nz = bmat->i[j+1]-bmat->i[j]; 683 for (i=0; i<nz; i++) { 684 for (col=0; col<bs; col++) { 685 for (row=0; row<bs; row++) { 686 sums[row] += PetscAbsScalar(*v); v++; 687 } 688 } 689 } 690 for (row=0; row<bs; row++) { 691 if (sums[row] > sum) sum = sums[row]; 692 } 693 } 694 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 695 ierr = PetscFree(sums);CHKERRQ(ierr); 696 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 697 } 698 PetscFunctionReturn(0); 699 } 700 701 /* 702 Creates the hash table, and sets the table 703 This table is created only once. 704 If new entried need to be added to the matrix 705 then the hash table has to be destroyed and 706 recreated. 707 */ 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 710 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 711 { 712 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 713 Mat A = baij->A,B=baij->B; 714 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 715 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 716 PetscErrorCode ierr; 717 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 718 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 719 PetscInt *HT,key; 720 MatScalar **HD; 721 PetscReal tmp; 722 #if defined(PETSC_USE_INFO) 723 PetscInt ct=0,max=0; 724 #endif 725 726 PetscFunctionBegin; 727 if (baij->ht) PetscFunctionReturn(0); 728 729 baij->ht_size = (PetscInt)(factor*nz); 730 ht_size = baij->ht_size; 731 732 /* Allocate Memory for Hash Table */ 733 ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr); 734 HD = baij->hd; 735 HT = baij->ht; 736 737 /* Loop Over A */ 738 for (i=0; i<a->mbs; i++) { 739 for (j=ai[i]; j<ai[i+1]; j++) { 740 row = i+rstart; 741 col = aj[j]+cstart; 742 743 key = row*Nbs + col + 1; 744 h1 = HASH(ht_size,key,tmp); 745 for (k=0; k<ht_size; k++) { 746 if (!HT[(h1+k)%ht_size]) { 747 HT[(h1+k)%ht_size] = key; 748 HD[(h1+k)%ht_size] = a->a + j*bs2; 749 break; 750 #if defined(PETSC_USE_INFO) 751 } else { 752 ct++; 753 #endif 754 } 755 } 756 #if defined(PETSC_USE_INFO) 757 if (k> max) max = k; 758 #endif 759 } 760 } 761 /* Loop Over B */ 762 for (i=0; i<b->mbs; i++) { 763 for (j=bi[i]; j<bi[i+1]; j++) { 764 row = i+rstart; 765 col = garray[bj[j]]; 766 key = row*Nbs + col + 1; 767 h1 = HASH(ht_size,key,tmp); 768 for (k=0; k<ht_size; k++) { 769 if (!HT[(h1+k)%ht_size]) { 770 HT[(h1+k)%ht_size] = key; 771 HD[(h1+k)%ht_size] = b->a + j*bs2; 772 break; 773 #if defined(PETSC_USE_INFO) 774 } else { 775 ct++; 776 #endif 777 } 778 } 779 #if defined(PETSC_USE_INFO) 780 if (k> max) max = k; 781 #endif 782 } 783 } 784 785 /* Print Summary */ 786 #if defined(PETSC_USE_INFO) 787 for (i=0,j=0; i<ht_size; i++) { 788 if (HT[i]) j++; 789 } 790 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 791 #endif 792 PetscFunctionReturn(0); 793 } 794 795 #undef __FUNCT__ 796 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 797 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 798 { 799 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 800 PetscErrorCode ierr; 801 PetscInt nstash,reallocs; 802 InsertMode addv; 803 804 PetscFunctionBegin; 805 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 806 807 /* make sure all processors are either in INSERTMODE or ADDMODE */ 808 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 809 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 810 mat->insertmode = addv; /* in case this processor had no cache */ 811 812 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 813 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 814 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 815 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 816 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 817 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 818 PetscFunctionReturn(0); 819 } 820 821 #undef __FUNCT__ 822 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 823 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 824 { 825 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 826 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 827 PetscErrorCode ierr; 828 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 829 PetscInt *row,*col; 830 PetscBool r1,r2,r3,other_disassembled; 831 MatScalar *val; 832 InsertMode addv = mat->insertmode; 833 PetscMPIInt n; 834 835 PetscFunctionBegin; 836 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 837 if (!baij->donotstash && !mat->nooffprocentries) { 838 while (1) { 839 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 840 if (!flg) break; 841 842 for (i=0; i<n;) { 843 /* Now identify the consecutive vals belonging to the same row */ 844 for (j=i,rstart=row[j]; j<n; j++) { 845 if (row[j] != rstart) break; 846 } 847 if (j < n) ncols = j-i; 848 else ncols = n-i; 849 /* Now assemble all these values with a single function call */ 850 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 851 i = j; 852 } 853 } 854 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 855 /* Now process the block-stash. Since the values are stashed column-oriented, 856 set the roworiented flag to column oriented, and after MatSetValues() 857 restore the original flags */ 858 r1 = baij->roworiented; 859 r2 = a->roworiented; 860 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 861 862 baij->roworiented = PETSC_FALSE; 863 a->roworiented = PETSC_FALSE; 864 865 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 866 while (1) { 867 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 868 if (!flg) break; 869 870 for (i=0; i<n;) { 871 /* Now identify the consecutive vals belonging to the same row */ 872 for (j=i,rstart=row[j]; j<n; j++) { 873 if (row[j] != rstart) break; 874 } 875 if (j < n) ncols = j-i; 876 else ncols = n-i; 877 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 878 i = j; 879 } 880 } 881 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 882 883 baij->roworiented = r1; 884 a->roworiented = r2; 885 886 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 887 } 888 889 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 890 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 891 892 /* determine if any processor has disassembled, if so we must 893 also disassemble ourselfs, in order that we may reassemble. */ 894 /* 895 if nonzero structure of submatrix B cannot change then we know that 896 no processor disassembled thus we can skip this stuff 897 */ 898 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 899 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 900 if (mat->was_assembled && !other_disassembled) { 901 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 902 } 903 } 904 905 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 906 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 907 } 908 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 909 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 910 911 #if defined(PETSC_USE_INFO) 912 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 913 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 914 915 baij->ht_total_ct = 0; 916 baij->ht_insert_ct = 0; 917 } 918 #endif 919 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 920 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 921 922 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 923 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 924 } 925 926 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 927 928 baij->rowvalues = 0; 929 { 930 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 931 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 932 } 933 PetscFunctionReturn(0); 934 } 935 936 #include <petscdraw.h> 937 #undef __FUNCT__ 938 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 939 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 940 { 941 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 942 PetscErrorCode ierr; 943 PetscMPIInt size = baij->size,rank = baij->rank; 944 PetscInt bs = mat->rmap->bs; 945 PetscBool iascii,isdraw; 946 PetscViewer sviewer; 947 PetscViewerFormat format; 948 949 PetscFunctionBegin; 950 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 951 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 952 if (iascii) { 953 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 954 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 955 MatInfo info; 956 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 957 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 958 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 959 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 960 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 961 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 962 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 963 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 964 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 965 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 966 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 967 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 968 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 969 PetscFunctionReturn(0); 970 } else if (format == PETSC_VIEWER_ASCII_INFO) { 971 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 972 PetscFunctionReturn(0); 973 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 974 PetscFunctionReturn(0); 975 } 976 } 977 978 if (isdraw) { 979 PetscDraw draw; 980 PetscBool isnull; 981 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 982 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 983 } 984 985 if (size == 1) { 986 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 987 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 988 } else { 989 /* assemble the entire matrix onto first processor. */ 990 Mat A; 991 Mat_SeqBAIJ *Aloc; 992 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 993 MatScalar *a; 994 995 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 996 /* Perhaps this should be the type of mat? */ 997 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 998 if (!rank) { 999 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1000 } else { 1001 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1002 } 1003 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1004 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 1005 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1006 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1007 1008 /* copy over the A part */ 1009 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1010 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1011 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1012 1013 for (i=0; i<mbs; i++) { 1014 rvals[0] = bs*(baij->rstartbs + i); 1015 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1016 for (j=ai[i]; j<ai[i+1]; j++) { 1017 col = (baij->cstartbs+aj[j])*bs; 1018 for (k=0; k<bs; k++) { 1019 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1020 col++; a += bs; 1021 } 1022 } 1023 } 1024 /* copy over the B part */ 1025 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1026 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1027 for (i=0; i<mbs; i++) { 1028 rvals[0] = bs*(baij->rstartbs + i); 1029 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1030 for (j=ai[i]; j<ai[i+1]; j++) { 1031 col = baij->garray[aj[j]]*bs; 1032 for (k=0; k<bs; k++) { 1033 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1034 col++; a += bs; 1035 } 1036 } 1037 } 1038 ierr = PetscFree(rvals);CHKERRQ(ierr); 1039 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1040 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1041 /* 1042 Everyone has to call to draw the matrix since the graphics waits are 1043 synchronized across all processors that share the PetscDraw object 1044 */ 1045 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1046 if (!rank) { 1047 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1048 /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/ 1049 PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ); 1050 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1051 } 1052 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1053 ierr = MatDestroy(&A);CHKERRQ(ierr); 1054 } 1055 PetscFunctionReturn(0); 1056 } 1057 1058 #undef __FUNCT__ 1059 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1060 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1061 { 1062 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1063 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)a->A->data; 1064 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 1065 PetscErrorCode ierr; 1066 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1067 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1068 int fd; 1069 PetscScalar *column_values; 1070 FILE *file; 1071 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1072 PetscInt message_count,flowcontrolcount; 1073 1074 PetscFunctionBegin; 1075 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1076 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1077 nz = bs2*(A->nz + B->nz); 1078 rlen = mat->rmap->n; 1079 if (!rank) { 1080 header[0] = MAT_FILE_CLASSID; 1081 header[1] = mat->rmap->N; 1082 header[2] = mat->cmap->N; 1083 1084 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1085 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1086 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1087 /* get largest number of rows any processor has */ 1088 range = mat->rmap->range; 1089 for (i=1; i<size; i++) { 1090 rlen = PetscMax(rlen,range[i+1] - range[i]); 1091 } 1092 } else { 1093 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1094 } 1095 1096 ierr = PetscMalloc1((rlen/bs),&crow_lens);CHKERRQ(ierr); 1097 /* compute lengths of each row */ 1098 for (i=0; i<a->mbs; i++) { 1099 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1100 } 1101 /* store the row lengths to the file */ 1102 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1103 if (!rank) { 1104 MPI_Status status; 1105 ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr); 1106 rlen = (range[1] - range[0])/bs; 1107 for (i=0; i<rlen; i++) { 1108 for (j=0; j<bs; j++) { 1109 row_lens[i*bs+j] = bs*crow_lens[i]; 1110 } 1111 } 1112 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1113 for (i=1; i<size; i++) { 1114 rlen = (range[i+1] - range[i])/bs; 1115 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1116 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1117 for (k=0; k<rlen; k++) { 1118 for (j=0; j<bs; j++) { 1119 row_lens[k*bs+j] = bs*crow_lens[k]; 1120 } 1121 } 1122 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1123 } 1124 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1125 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1126 } else { 1127 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1128 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1129 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1130 } 1131 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1132 1133 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1134 information needed to make it for each row from a block row. This does require more communication but still not more than 1135 the communication needed for the nonzero values */ 1136 nzmax = nz; /* space a largest processor needs */ 1137 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1138 ierr = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr); 1139 cnt = 0; 1140 for (i=0; i<a->mbs; i++) { 1141 pcnt = cnt; 1142 for (j=B->i[i]; j<B->i[i+1]; j++) { 1143 if ((col = garray[B->j[j]]) > cstart) break; 1144 for (l=0; l<bs; l++) { 1145 column_indices[cnt++] = bs*col+l; 1146 } 1147 } 1148 for (k=A->i[i]; k<A->i[i+1]; k++) { 1149 for (l=0; l<bs; l++) { 1150 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1151 } 1152 } 1153 for (; j<B->i[i+1]; j++) { 1154 for (l=0; l<bs; l++) { 1155 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1156 } 1157 } 1158 len = cnt - pcnt; 1159 for (k=1; k<bs; k++) { 1160 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1161 cnt += len; 1162 } 1163 } 1164 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1165 1166 /* store the columns to the file */ 1167 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1168 if (!rank) { 1169 MPI_Status status; 1170 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1171 for (i=1; i<size; i++) { 1172 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1173 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1174 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1175 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1176 } 1177 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1178 } else { 1179 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1180 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1181 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1182 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1183 } 1184 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1185 1186 /* load up the numerical values */ 1187 ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr); 1188 cnt = 0; 1189 for (i=0; i<a->mbs; i++) { 1190 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1191 for (j=B->i[i]; j<B->i[i+1]; j++) { 1192 if (garray[B->j[j]] > cstart) break; 1193 for (l=0; l<bs; l++) { 1194 for (ll=0; ll<bs; ll++) { 1195 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1196 } 1197 } 1198 cnt += bs; 1199 } 1200 for (k=A->i[i]; k<A->i[i+1]; k++) { 1201 for (l=0; l<bs; l++) { 1202 for (ll=0; ll<bs; ll++) { 1203 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1204 } 1205 } 1206 cnt += bs; 1207 } 1208 for (; j<B->i[i+1]; j++) { 1209 for (l=0; l<bs; l++) { 1210 for (ll=0; ll<bs; ll++) { 1211 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1212 } 1213 } 1214 cnt += bs; 1215 } 1216 cnt += (bs-1)*rlen; 1217 } 1218 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1219 1220 /* store the column values to the file */ 1221 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1222 if (!rank) { 1223 MPI_Status status; 1224 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1225 for (i=1; i<size; i++) { 1226 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1227 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1228 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1229 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1230 } 1231 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1232 } else { 1233 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1234 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1235 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1236 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1237 } 1238 ierr = PetscFree(column_values);CHKERRQ(ierr); 1239 1240 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1241 if (file) { 1242 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1243 } 1244 PetscFunctionReturn(0); 1245 } 1246 1247 #undef __FUNCT__ 1248 #define __FUNCT__ "MatView_MPIBAIJ" 1249 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1250 { 1251 PetscErrorCode ierr; 1252 PetscBool iascii,isdraw,issocket,isbinary; 1253 1254 PetscFunctionBegin; 1255 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1256 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1257 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1258 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1259 if (iascii || isdraw || issocket) { 1260 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1261 } else if (isbinary) { 1262 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1263 } 1264 PetscFunctionReturn(0); 1265 } 1266 1267 #undef __FUNCT__ 1268 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1269 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1270 { 1271 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1272 PetscErrorCode ierr; 1273 1274 PetscFunctionBegin; 1275 #if defined(PETSC_USE_LOG) 1276 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1277 #endif 1278 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1279 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1280 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1281 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1282 #if defined(PETSC_USE_CTABLE) 1283 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1284 #else 1285 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1286 #endif 1287 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1288 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1289 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1290 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1291 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1292 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1293 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1294 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1295 1296 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr); 1306 PetscFunctionReturn(0); 1307 } 1308 1309 #undef __FUNCT__ 1310 #define __FUNCT__ "MatMult_MPIBAIJ" 1311 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1312 { 1313 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1314 PetscErrorCode ierr; 1315 PetscInt nt; 1316 1317 PetscFunctionBegin; 1318 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1319 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1320 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1321 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1322 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1323 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1324 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1325 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1326 PetscFunctionReturn(0); 1327 } 1328 1329 #undef __FUNCT__ 1330 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1331 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1332 { 1333 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1334 PetscErrorCode ierr; 1335 1336 PetscFunctionBegin; 1337 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1338 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1339 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1340 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1341 PetscFunctionReturn(0); 1342 } 1343 1344 #undef __FUNCT__ 1345 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1346 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1347 { 1348 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1349 PetscErrorCode ierr; 1350 PetscBool merged; 1351 1352 PetscFunctionBegin; 1353 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1354 /* do nondiagonal part */ 1355 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1356 if (!merged) { 1357 /* send it on its way */ 1358 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1359 /* do local part */ 1360 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1361 /* receive remote parts: note this assumes the values are not actually */ 1362 /* inserted in yy until the next line */ 1363 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1364 } else { 1365 /* do local part */ 1366 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1367 /* send it on its way */ 1368 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1369 /* values actually were received in the Begin() but we need to call this nop */ 1370 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1371 } 1372 PetscFunctionReturn(0); 1373 } 1374 1375 #undef __FUNCT__ 1376 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1377 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1378 { 1379 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1380 PetscErrorCode ierr; 1381 1382 PetscFunctionBegin; 1383 /* do nondiagonal part */ 1384 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1385 /* send it on its way */ 1386 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1387 /* do local part */ 1388 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1389 /* receive remote parts: note this assumes the values are not actually */ 1390 /* inserted in yy until the next line, which is true for my implementation*/ 1391 /* but is not perhaps always true. */ 1392 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1393 PetscFunctionReturn(0); 1394 } 1395 1396 /* 1397 This only works correctly for square matrices where the subblock A->A is the 1398 diagonal block 1399 */ 1400 #undef __FUNCT__ 1401 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1402 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1403 { 1404 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1405 PetscErrorCode ierr; 1406 1407 PetscFunctionBegin; 1408 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1409 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1410 PetscFunctionReturn(0); 1411 } 1412 1413 #undef __FUNCT__ 1414 #define __FUNCT__ "MatScale_MPIBAIJ" 1415 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1416 { 1417 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1418 PetscErrorCode ierr; 1419 1420 PetscFunctionBegin; 1421 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1422 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1423 PetscFunctionReturn(0); 1424 } 1425 1426 #undef __FUNCT__ 1427 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1428 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1429 { 1430 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1431 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1432 PetscErrorCode ierr; 1433 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1434 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1435 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1436 1437 PetscFunctionBegin; 1438 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1439 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1440 mat->getrowactive = PETSC_TRUE; 1441 1442 if (!mat->rowvalues && (idx || v)) { 1443 /* 1444 allocate enough space to hold information from the longest row. 1445 */ 1446 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1447 PetscInt max = 1,mbs = mat->mbs,tmp; 1448 for (i=0; i<mbs; i++) { 1449 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1450 if (max < tmp) max = tmp; 1451 } 1452 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1453 } 1454 lrow = row - brstart; 1455 1456 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1457 if (!v) {pvA = 0; pvB = 0;} 1458 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1459 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1460 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1461 nztot = nzA + nzB; 1462 1463 cmap = mat->garray; 1464 if (v || idx) { 1465 if (nztot) { 1466 /* Sort by increasing column numbers, assuming A and B already sorted */ 1467 PetscInt imark = -1; 1468 if (v) { 1469 *v = v_p = mat->rowvalues; 1470 for (i=0; i<nzB; i++) { 1471 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1472 else break; 1473 } 1474 imark = i; 1475 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1476 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1477 } 1478 if (idx) { 1479 *idx = idx_p = mat->rowindices; 1480 if (imark > -1) { 1481 for (i=0; i<imark; i++) { 1482 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1483 } 1484 } else { 1485 for (i=0; i<nzB; i++) { 1486 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1487 else break; 1488 } 1489 imark = i; 1490 } 1491 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1492 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1493 } 1494 } else { 1495 if (idx) *idx = 0; 1496 if (v) *v = 0; 1497 } 1498 } 1499 *nz = nztot; 1500 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1501 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1502 PetscFunctionReturn(0); 1503 } 1504 1505 #undef __FUNCT__ 1506 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1507 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1508 { 1509 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1510 1511 PetscFunctionBegin; 1512 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1513 baij->getrowactive = PETSC_FALSE; 1514 PetscFunctionReturn(0); 1515 } 1516 1517 #undef __FUNCT__ 1518 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1519 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1520 { 1521 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1522 PetscErrorCode ierr; 1523 1524 PetscFunctionBegin; 1525 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1526 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1527 PetscFunctionReturn(0); 1528 } 1529 1530 #undef __FUNCT__ 1531 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1532 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1533 { 1534 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1535 Mat A = a->A,B = a->B; 1536 PetscErrorCode ierr; 1537 PetscReal isend[5],irecv[5]; 1538 1539 PetscFunctionBegin; 1540 info->block_size = (PetscReal)matin->rmap->bs; 1541 1542 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1543 1544 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1545 isend[3] = info->memory; isend[4] = info->mallocs; 1546 1547 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1548 1549 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1550 isend[3] += info->memory; isend[4] += info->mallocs; 1551 1552 if (flag == MAT_LOCAL) { 1553 info->nz_used = isend[0]; 1554 info->nz_allocated = isend[1]; 1555 info->nz_unneeded = isend[2]; 1556 info->memory = isend[3]; 1557 info->mallocs = isend[4]; 1558 } else if (flag == MAT_GLOBAL_MAX) { 1559 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1560 1561 info->nz_used = irecv[0]; 1562 info->nz_allocated = irecv[1]; 1563 info->nz_unneeded = irecv[2]; 1564 info->memory = irecv[3]; 1565 info->mallocs = irecv[4]; 1566 } else if (flag == MAT_GLOBAL_SUM) { 1567 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1568 1569 info->nz_used = irecv[0]; 1570 info->nz_allocated = irecv[1]; 1571 info->nz_unneeded = irecv[2]; 1572 info->memory = irecv[3]; 1573 info->mallocs = irecv[4]; 1574 } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1575 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1576 info->fill_ratio_needed = 0; 1577 info->factor_mallocs = 0; 1578 PetscFunctionReturn(0); 1579 } 1580 1581 #undef __FUNCT__ 1582 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1583 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1584 { 1585 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1586 PetscErrorCode ierr; 1587 1588 PetscFunctionBegin; 1589 switch (op) { 1590 case MAT_NEW_NONZERO_LOCATIONS: 1591 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1592 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1593 case MAT_KEEP_NONZERO_PATTERN: 1594 case MAT_NEW_NONZERO_LOCATION_ERR: 1595 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1596 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1597 break; 1598 case MAT_ROW_ORIENTED: 1599 a->roworiented = flg; 1600 1601 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1602 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1603 break; 1604 case MAT_NEW_DIAGONALS: 1605 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1606 break; 1607 case MAT_IGNORE_OFF_PROC_ENTRIES: 1608 a->donotstash = flg; 1609 break; 1610 case MAT_USE_HASH_TABLE: 1611 a->ht_flag = flg; 1612 break; 1613 case MAT_SYMMETRIC: 1614 case MAT_STRUCTURALLY_SYMMETRIC: 1615 case MAT_HERMITIAN: 1616 case MAT_SYMMETRY_ETERNAL: 1617 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1618 break; 1619 default: 1620 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1621 } 1622 PetscFunctionReturn(0); 1623 } 1624 1625 #undef __FUNCT__ 1626 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1627 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1628 { 1629 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1630 Mat_SeqBAIJ *Aloc; 1631 Mat B; 1632 PetscErrorCode ierr; 1633 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1634 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1635 MatScalar *a; 1636 1637 PetscFunctionBegin; 1638 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1639 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1640 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1641 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1642 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1643 /* Do not know preallocation information, but must set block size */ 1644 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr); 1645 } else { 1646 B = *matout; 1647 } 1648 1649 /* copy over the A part */ 1650 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1651 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1652 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1653 1654 for (i=0; i<mbs; i++) { 1655 rvals[0] = bs*(baij->rstartbs + i); 1656 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1657 for (j=ai[i]; j<ai[i+1]; j++) { 1658 col = (baij->cstartbs+aj[j])*bs; 1659 for (k=0; k<bs; k++) { 1660 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1661 1662 col++; a += bs; 1663 } 1664 } 1665 } 1666 /* copy over the B part */ 1667 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1668 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1669 for (i=0; i<mbs; i++) { 1670 rvals[0] = bs*(baij->rstartbs + i); 1671 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1672 for (j=ai[i]; j<ai[i+1]; j++) { 1673 col = baij->garray[aj[j]]*bs; 1674 for (k=0; k<bs; k++) { 1675 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1676 col++; 1677 a += bs; 1678 } 1679 } 1680 } 1681 ierr = PetscFree(rvals);CHKERRQ(ierr); 1682 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1683 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1684 1685 if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B; 1686 else { 1687 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1688 } 1689 PetscFunctionReturn(0); 1690 } 1691 1692 #undef __FUNCT__ 1693 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1694 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1695 { 1696 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1697 Mat a = baij->A,b = baij->B; 1698 PetscErrorCode ierr; 1699 PetscInt s1,s2,s3; 1700 1701 PetscFunctionBegin; 1702 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1703 if (rr) { 1704 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1705 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1706 /* Overlap communication with computation. */ 1707 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1708 } 1709 if (ll) { 1710 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1711 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1712 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1713 } 1714 /* scale the diagonal block */ 1715 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1716 1717 if (rr) { 1718 /* Do a scatter end and then right scale the off-diagonal block */ 1719 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1720 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1721 } 1722 PetscFunctionReturn(0); 1723 } 1724 1725 #undef __FUNCT__ 1726 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1727 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1728 { 1729 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1730 PetscInt *owners = A->rmap->range; 1731 PetscInt n = A->rmap->n; 1732 PetscMPIInt size = l->size; 1733 PetscSF sf; 1734 PetscInt *lrows; 1735 PetscSFNode *rrows; 1736 PetscInt lastidx = -1, r, p = 0, len = 0; 1737 PetscErrorCode ierr; 1738 1739 PetscFunctionBegin; 1740 /* Create SF where leaves are input rows and roots are owned rows */ 1741 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1742 for (r = 0; r < n; ++r) lrows[r] = -1; 1743 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1744 for (r = 0; r < N; ++r) { 1745 const PetscInt idx = rows[r]; 1746 PetscBool found = PETSC_FALSE; 1747 /* Trick for efficient searching for sorted rows */ 1748 if (lastidx > idx) p = 0; 1749 lastidx = idx; 1750 for (; p < size; ++p) { 1751 if (idx >= owners[p] && idx < owners[p+1]) { 1752 rrows[r].rank = p; 1753 rrows[r].index = rows[r] - owners[p]; 1754 found = PETSC_TRUE; 1755 break; 1756 } 1757 } 1758 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 1759 } 1760 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1761 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1762 /* Collect flags for rows to be zeroed */ 1763 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1764 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1765 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1766 /* Compress and put in row numbers */ 1767 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1768 /* fix right hand side if needed */ 1769 if (x && b) { 1770 const PetscScalar *xx; 1771 PetscScalar *bb; 1772 1773 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1774 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1775 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 1776 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1777 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1778 } 1779 1780 /* actually zap the local rows */ 1781 /* 1782 Zero the required rows. If the "diagonal block" of the matrix 1783 is square and the user wishes to set the diagonal we use separate 1784 code so that MatSetValues() is not called for each diagonal allocating 1785 new memory, thus calling lots of mallocs and slowing things down. 1786 1787 */ 1788 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1789 ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,0,0);CHKERRQ(ierr); 1790 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1791 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,0,0);CHKERRQ(ierr); 1792 } else if (diag != 0.0) { 1793 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr); 1794 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1795 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1796 for (r = 0; r < len; ++r) { 1797 const PetscInt row = lrows[r] + A->rmap->rstart; 1798 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1799 } 1800 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1801 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1802 } else { 1803 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr); 1804 } 1805 ierr = PetscFree(lrows);CHKERRQ(ierr); 1806 { 1807 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1808 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1809 } 1810 PetscFunctionReturn(0); 1811 } 1812 1813 #undef __FUNCT__ 1814 #define __FUNCT__ "MatZeroRowsColumns_MPIBAIJ" 1815 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1816 { 1817 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1818 PetscErrorCode ierr; 1819 PetscMPIInt size = l->size,n = A->rmap->n,lastidx = -1; 1820 PetscInt i,j,k,r,p = 0,len = 0,row,col,count; 1821 PetscInt *lrows,*owners = A->rmap->range; 1822 PetscSFNode *rrows; 1823 PetscSF sf; 1824 const PetscScalar *xx; 1825 PetscScalar *bb,*mask; 1826 Vec xmask,lmask; 1827 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)l->B->data; 1828 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1829 PetscScalar *aa; 1830 #if defined(PETSC_DEBUG) 1831 PetscBool found = PETSC_FALSE; 1832 #endif 1833 1834 PetscFunctionBegin; 1835 /* Create SF where leaves are input rows and roots are owned rows */ 1836 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1837 for (r = 0; r < n; ++r) lrows[r] = -1; 1838 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1839 for (r = 0; r < N; ++r) { 1840 const PetscInt idx = rows[r]; 1841 PetscBool found = PETSC_FALSE; 1842 /* Trick for efficient searching for sorted rows */ 1843 if (lastidx > idx) p = 0; 1844 lastidx = idx; 1845 for (; p < size; ++p) { 1846 if (idx >= owners[p] && idx < owners[p+1]) { 1847 rrows[r].rank = p; 1848 rrows[r].index = rows[r] - owners[p]; 1849 found = PETSC_TRUE; 1850 break; 1851 } 1852 } 1853 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 1854 } 1855 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1856 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1857 /* Collect flags for rows to be zeroed */ 1858 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1859 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1860 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1861 /* Compress and put in row numbers */ 1862 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1863 /* zero diagonal part of matrix */ 1864 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1865 /* handle off diagonal part of matrix */ 1866 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 1867 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1868 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1869 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1870 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1871 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1872 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1873 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1874 if (x) { 1875 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1876 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1877 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1878 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1879 } 1880 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1881 /* remove zeroed rows of off diagonal matrix */ 1882 for (i = 0; i < len; ++i) { 1883 row = lrows[i]; 1884 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1885 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 1886 for (k = 0; k < count; ++k) { 1887 aa[0] = 0.0; 1888 aa += bs; 1889 } 1890 } 1891 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1892 for (i = 0; i < l->B->rmap->N; ++i) { 1893 row = i/bs; 1894 for (j = baij->i[row]; j < baij->i[row+1]; ++j) { 1895 for (k = 0; k < bs; ++k) { 1896 col = bs*baij->j[j] + k; 1897 if (PetscAbsScalar(mask[col])) { 1898 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 1899 if (b) bb[i] -= aa[0]*xx[col]; 1900 aa[0] = 0.0; 1901 } 1902 } 1903 } 1904 } 1905 if (x) { 1906 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1907 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1908 } 1909 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1910 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1911 ierr = PetscFree(lrows);CHKERRQ(ierr); 1912 PetscFunctionReturn(0); 1913 } 1914 1915 #undef __FUNCT__ 1916 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1917 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1918 { 1919 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1920 PetscErrorCode ierr; 1921 1922 PetscFunctionBegin; 1923 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1924 PetscFunctionReturn(0); 1925 } 1926 1927 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 1928 1929 #undef __FUNCT__ 1930 #define __FUNCT__ "MatEqual_MPIBAIJ" 1931 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1932 { 1933 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1934 Mat a,b,c,d; 1935 PetscBool flg; 1936 PetscErrorCode ierr; 1937 1938 PetscFunctionBegin; 1939 a = matA->A; b = matA->B; 1940 c = matB->A; d = matB->B; 1941 1942 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1943 if (flg) { 1944 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1945 } 1946 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1947 PetscFunctionReturn(0); 1948 } 1949 1950 #undef __FUNCT__ 1951 #define __FUNCT__ "MatCopy_MPIBAIJ" 1952 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1953 { 1954 PetscErrorCode ierr; 1955 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1956 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 1957 1958 PetscFunctionBegin; 1959 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1960 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1961 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1962 } else { 1963 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1964 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1965 } 1966 PetscFunctionReturn(0); 1967 } 1968 1969 #undef __FUNCT__ 1970 #define __FUNCT__ "MatSetUp_MPIBAIJ" 1971 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1972 { 1973 PetscErrorCode ierr; 1974 1975 PetscFunctionBegin; 1976 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1977 PetscFunctionReturn(0); 1978 } 1979 1980 #undef __FUNCT__ 1981 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1982 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1983 { 1984 PetscErrorCode ierr; 1985 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 1986 PetscBLASInt bnz,one=1; 1987 Mat_SeqBAIJ *x,*y; 1988 1989 PetscFunctionBegin; 1990 if (str == SAME_NONZERO_PATTERN) { 1991 PetscScalar alpha = a; 1992 x = (Mat_SeqBAIJ*)xx->A->data; 1993 y = (Mat_SeqBAIJ*)yy->A->data; 1994 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 1995 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1996 x = (Mat_SeqBAIJ*)xx->B->data; 1997 y = (Mat_SeqBAIJ*)yy->B->data; 1998 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 1999 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2000 } else { 2001 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2002 } 2003 PetscFunctionReturn(0); 2004 } 2005 2006 #undef __FUNCT__ 2007 #define __FUNCT__ "MatRealPart_MPIBAIJ" 2008 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 2009 { 2010 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2011 PetscErrorCode ierr; 2012 2013 PetscFunctionBegin; 2014 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2015 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2016 PetscFunctionReturn(0); 2017 } 2018 2019 #undef __FUNCT__ 2020 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 2021 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2022 { 2023 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2024 PetscErrorCode ierr; 2025 2026 PetscFunctionBegin; 2027 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2028 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2029 PetscFunctionReturn(0); 2030 } 2031 2032 #undef __FUNCT__ 2033 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2034 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2035 { 2036 PetscErrorCode ierr; 2037 IS iscol_local; 2038 PetscInt csize; 2039 2040 PetscFunctionBegin; 2041 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2042 if (call == MAT_REUSE_MATRIX) { 2043 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2044 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2045 } else { 2046 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2047 } 2048 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2049 if (call == MAT_INITIAL_MATRIX) { 2050 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2051 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2052 } 2053 PetscFunctionReturn(0); 2054 } 2055 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*); 2056 #undef __FUNCT__ 2057 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2058 /* 2059 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2060 in local and then by concatenating the local matrices the end result. 2061 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2062 */ 2063 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2064 { 2065 PetscErrorCode ierr; 2066 PetscMPIInt rank,size; 2067 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2068 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow; 2069 Mat M,Mreuse; 2070 MatScalar *vwork,*aa; 2071 MPI_Comm comm; 2072 IS isrow_new, iscol_new; 2073 PetscBool idflag,allrows, allcols; 2074 Mat_SeqBAIJ *aij; 2075 2076 PetscFunctionBegin; 2077 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2078 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2079 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2080 /* The compression and expansion should be avoided. Doesn't point 2081 out errors, might change the indices, hence buggey */ 2082 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 2083 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 2084 2085 /* Check for special case: each processor gets entire matrix columns */ 2086 ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr); 2087 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 2088 if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE; 2089 else allcols = PETSC_FALSE; 2090 2091 ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr); 2092 ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr); 2093 if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE; 2094 else allrows = PETSC_FALSE; 2095 2096 if (call == MAT_REUSE_MATRIX) { 2097 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 2098 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2099 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2100 } else { 2101 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2102 } 2103 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 2104 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 2105 /* 2106 m - number of local rows 2107 n - number of columns (same on all processors) 2108 rstart - first row in new global matrix generated 2109 */ 2110 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2111 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2112 m = m/bs; 2113 n = n/bs; 2114 2115 if (call == MAT_INITIAL_MATRIX) { 2116 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2117 ii = aij->i; 2118 jj = aij->j; 2119 2120 /* 2121 Determine the number of non-zeros in the diagonal and off-diagonal 2122 portions of the matrix in order to do correct preallocation 2123 */ 2124 2125 /* first get start and end of "diagonal" columns */ 2126 if (csize == PETSC_DECIDE) { 2127 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2128 if (mglobal == n*bs) { /* square matrix */ 2129 nlocal = m; 2130 } else { 2131 nlocal = n/size + ((n % size) > rank); 2132 } 2133 } else { 2134 nlocal = csize/bs; 2135 } 2136 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2137 rstart = rend - nlocal; 2138 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2139 2140 /* next, compute all the lengths */ 2141 ierr = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr); 2142 for (i=0; i<m; i++) { 2143 jend = ii[i+1] - ii[i]; 2144 olen = 0; 2145 dlen = 0; 2146 for (j=0; j<jend; j++) { 2147 if (*jj < rstart || *jj >= rend) olen++; 2148 else dlen++; 2149 jj++; 2150 } 2151 olens[i] = olen; 2152 dlens[i] = dlen; 2153 } 2154 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2155 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2156 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2157 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2158 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 2159 } else { 2160 PetscInt ml,nl; 2161 2162 M = *newmat; 2163 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2164 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2165 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2166 /* 2167 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2168 rather than the slower MatSetValues(). 2169 */ 2170 M->was_assembled = PETSC_TRUE; 2171 M->assembled = PETSC_FALSE; 2172 } 2173 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2174 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2175 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2176 ii = aij->i; 2177 jj = aij->j; 2178 aa = aij->a; 2179 for (i=0; i<m; i++) { 2180 row = rstart/bs + i; 2181 nz = ii[i+1] - ii[i]; 2182 cwork = jj; jj += nz; 2183 vwork = aa; aa += nz*bs*bs; 2184 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2185 } 2186 2187 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2188 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2189 *newmat = M; 2190 2191 /* save submatrix used in processor for next request */ 2192 if (call == MAT_INITIAL_MATRIX) { 2193 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2194 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2195 } 2196 PetscFunctionReturn(0); 2197 } 2198 2199 #undef __FUNCT__ 2200 #define __FUNCT__ "MatPermute_MPIBAIJ" 2201 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2202 { 2203 MPI_Comm comm,pcomm; 2204 PetscInt clocal_size,nrows; 2205 const PetscInt *rows; 2206 PetscMPIInt size; 2207 IS crowp,lcolp; 2208 PetscErrorCode ierr; 2209 2210 PetscFunctionBegin; 2211 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2212 /* make a collective version of 'rowp' */ 2213 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2214 if (pcomm==comm) { 2215 crowp = rowp; 2216 } else { 2217 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2218 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2219 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2220 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2221 } 2222 ierr = ISSetPermutation(crowp);CHKERRQ(ierr); 2223 /* make a local version of 'colp' */ 2224 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2225 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2226 if (size==1) { 2227 lcolp = colp; 2228 } else { 2229 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 2230 } 2231 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2232 /* now we just get the submatrix */ 2233 ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr); 2234 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2235 /* clean up */ 2236 if (pcomm!=comm) { 2237 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2238 } 2239 if (size>1) { 2240 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2241 } 2242 PetscFunctionReturn(0); 2243 } 2244 2245 #undef __FUNCT__ 2246 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2247 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2248 { 2249 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2250 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2251 2252 PetscFunctionBegin; 2253 if (nghosts) *nghosts = B->nbs; 2254 if (ghosts) *ghosts = baij->garray; 2255 PetscFunctionReturn(0); 2256 } 2257 2258 #undef __FUNCT__ 2259 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ" 2260 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2261 { 2262 Mat B; 2263 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2264 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2265 Mat_SeqAIJ *b; 2266 PetscErrorCode ierr; 2267 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2268 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2269 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2270 2271 PetscFunctionBegin; 2272 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2273 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2274 2275 /* ---------------------------------------------------------------- 2276 Tell every processor the number of nonzeros per row 2277 */ 2278 ierr = PetscMalloc1((A->rmap->N/bs),&lens);CHKERRQ(ierr); 2279 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2280 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2281 } 2282 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2283 ierr = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr); 2284 displs = recvcounts + size; 2285 for (i=0; i<size; i++) { 2286 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2287 displs[i] = A->rmap->range[i]/bs; 2288 } 2289 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2290 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2291 #else 2292 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2293 #endif 2294 /* --------------------------------------------------------------- 2295 Create the sequential matrix of the same type as the local block diagonal 2296 */ 2297 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2298 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2299 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2300 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2301 b = (Mat_SeqAIJ*)B->data; 2302 2303 /*-------------------------------------------------------------------- 2304 Copy my part of matrix column indices over 2305 */ 2306 sendcount = ad->nz + bd->nz; 2307 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2308 a_jsendbuf = ad->j; 2309 b_jsendbuf = bd->j; 2310 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2311 cnt = 0; 2312 for (i=0; i<n; i++) { 2313 2314 /* put in lower diagonal portion */ 2315 m = bd->i[i+1] - bd->i[i]; 2316 while (m > 0) { 2317 /* is it above diagonal (in bd (compressed) numbering) */ 2318 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2319 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2320 m--; 2321 } 2322 2323 /* put in diagonal portion */ 2324 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2325 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2326 } 2327 2328 /* put in upper diagonal portion */ 2329 while (m-- > 0) { 2330 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2331 } 2332 } 2333 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2334 2335 /*-------------------------------------------------------------------- 2336 Gather all column indices to all processors 2337 */ 2338 for (i=0; i<size; i++) { 2339 recvcounts[i] = 0; 2340 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2341 recvcounts[i] += lens[j]; 2342 } 2343 } 2344 displs[0] = 0; 2345 for (i=1; i<size; i++) { 2346 displs[i] = displs[i-1] + recvcounts[i-1]; 2347 } 2348 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2349 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2350 #else 2351 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2352 #endif 2353 /*-------------------------------------------------------------------- 2354 Assemble the matrix into useable form (note numerical values not yet set) 2355 */ 2356 /* set the b->ilen (length of each row) values */ 2357 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2358 /* set the b->i indices */ 2359 b->i[0] = 0; 2360 for (i=1; i<=A->rmap->N/bs; i++) { 2361 b->i[i] = b->i[i-1] + lens[i-1]; 2362 } 2363 ierr = PetscFree(lens);CHKERRQ(ierr); 2364 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2365 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2366 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2367 2368 if (A->symmetric) { 2369 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2370 } else if (A->hermitian) { 2371 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2372 } else if (A->structurally_symmetric) { 2373 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2374 } 2375 *newmat = B; 2376 PetscFunctionReturn(0); 2377 } 2378 2379 #undef __FUNCT__ 2380 #define __FUNCT__ "MatSOR_MPIBAIJ" 2381 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2382 { 2383 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2384 PetscErrorCode ierr; 2385 Vec bb1 = 0; 2386 2387 PetscFunctionBegin; 2388 if (flag == SOR_APPLY_UPPER) { 2389 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2390 PetscFunctionReturn(0); 2391 } 2392 2393 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2394 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2395 } 2396 2397 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2398 if (flag & SOR_ZERO_INITIAL_GUESS) { 2399 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2400 its--; 2401 } 2402 2403 while (its--) { 2404 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2405 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2406 2407 /* update rhs: bb1 = bb - B*x */ 2408 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2409 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2410 2411 /* local sweep */ 2412 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2413 } 2414 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2415 if (flag & SOR_ZERO_INITIAL_GUESS) { 2416 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2417 its--; 2418 } 2419 while (its--) { 2420 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2421 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2422 2423 /* update rhs: bb1 = bb - B*x */ 2424 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2425 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2426 2427 /* local sweep */ 2428 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2429 } 2430 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2431 if (flag & SOR_ZERO_INITIAL_GUESS) { 2432 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2433 its--; 2434 } 2435 while (its--) { 2436 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2437 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2438 2439 /* update rhs: bb1 = bb - B*x */ 2440 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2441 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2442 2443 /* local sweep */ 2444 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2445 } 2446 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2447 2448 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2449 PetscFunctionReturn(0); 2450 } 2451 2452 #undef __FUNCT__ 2453 #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ" 2454 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms) 2455 { 2456 PetscErrorCode ierr; 2457 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data; 2458 PetscInt N,i,*garray = aij->garray; 2459 PetscInt ib,jb,bs = A->rmap->bs; 2460 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data; 2461 MatScalar *a_val = a_aij->a; 2462 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data; 2463 MatScalar *b_val = b_aij->a; 2464 PetscReal *work; 2465 2466 PetscFunctionBegin; 2467 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 2468 ierr = PetscCalloc1(N,&work);CHKERRQ(ierr); 2469 if (type == NORM_2) { 2470 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2471 for (jb=0; jb<bs; jb++) { 2472 for (ib=0; ib<bs; ib++) { 2473 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2474 a_val++; 2475 } 2476 } 2477 } 2478 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2479 for (jb=0; jb<bs; jb++) { 2480 for (ib=0; ib<bs; ib++) { 2481 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2482 b_val++; 2483 } 2484 } 2485 } 2486 } else if (type == NORM_1) { 2487 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2488 for (jb=0; jb<bs; jb++) { 2489 for (ib=0; ib<bs; ib++) { 2490 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2491 a_val++; 2492 } 2493 } 2494 } 2495 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2496 for (jb=0; jb<bs; jb++) { 2497 for (ib=0; ib<bs; ib++) { 2498 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2499 b_val++; 2500 } 2501 } 2502 } 2503 } else if (type == NORM_INFINITY) { 2504 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2505 for (jb=0; jb<bs; jb++) { 2506 for (ib=0; ib<bs; ib++) { 2507 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2508 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2509 a_val++; 2510 } 2511 } 2512 } 2513 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2514 for (jb=0; jb<bs; jb++) { 2515 for (ib=0; ib<bs; ib++) { 2516 int col = garray[b_aij->j[i]] * bs + jb; 2517 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2518 b_val++; 2519 } 2520 } 2521 } 2522 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 2523 if (type == NORM_INFINITY) { 2524 ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2525 } else { 2526 ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2527 } 2528 ierr = PetscFree(work);CHKERRQ(ierr); 2529 if (type == NORM_2) { 2530 for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]); 2531 } 2532 PetscFunctionReturn(0); 2533 } 2534 2535 #undef __FUNCT__ 2536 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ" 2537 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2538 { 2539 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2540 PetscErrorCode ierr; 2541 2542 PetscFunctionBegin; 2543 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2544 PetscFunctionReturn(0); 2545 } 2546 2547 2548 /* -------------------------------------------------------------------*/ 2549 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2550 MatGetRow_MPIBAIJ, 2551 MatRestoreRow_MPIBAIJ, 2552 MatMult_MPIBAIJ, 2553 /* 4*/ MatMultAdd_MPIBAIJ, 2554 MatMultTranspose_MPIBAIJ, 2555 MatMultTransposeAdd_MPIBAIJ, 2556 0, 2557 0, 2558 0, 2559 /*10*/ 0, 2560 0, 2561 0, 2562 MatSOR_MPIBAIJ, 2563 MatTranspose_MPIBAIJ, 2564 /*15*/ MatGetInfo_MPIBAIJ, 2565 MatEqual_MPIBAIJ, 2566 MatGetDiagonal_MPIBAIJ, 2567 MatDiagonalScale_MPIBAIJ, 2568 MatNorm_MPIBAIJ, 2569 /*20*/ MatAssemblyBegin_MPIBAIJ, 2570 MatAssemblyEnd_MPIBAIJ, 2571 MatSetOption_MPIBAIJ, 2572 MatZeroEntries_MPIBAIJ, 2573 /*24*/ MatZeroRows_MPIBAIJ, 2574 0, 2575 0, 2576 0, 2577 0, 2578 /*29*/ MatSetUp_MPIBAIJ, 2579 0, 2580 0, 2581 0, 2582 0, 2583 /*34*/ MatDuplicate_MPIBAIJ, 2584 0, 2585 0, 2586 0, 2587 0, 2588 /*39*/ MatAXPY_MPIBAIJ, 2589 MatGetSubMatrices_MPIBAIJ, 2590 MatIncreaseOverlap_MPIBAIJ, 2591 MatGetValues_MPIBAIJ, 2592 MatCopy_MPIBAIJ, 2593 /*44*/ 0, 2594 MatScale_MPIBAIJ, 2595 0, 2596 0, 2597 MatZeroRowsColumns_MPIBAIJ, 2598 /*49*/ 0, 2599 0, 2600 0, 2601 0, 2602 0, 2603 /*54*/ MatFDColoringCreate_MPIXAIJ, 2604 0, 2605 MatSetUnfactored_MPIBAIJ, 2606 MatPermute_MPIBAIJ, 2607 MatSetValuesBlocked_MPIBAIJ, 2608 /*59*/ MatGetSubMatrix_MPIBAIJ, 2609 MatDestroy_MPIBAIJ, 2610 MatView_MPIBAIJ, 2611 0, 2612 0, 2613 /*64*/ 0, 2614 0, 2615 0, 2616 0, 2617 0, 2618 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2619 0, 2620 0, 2621 0, 2622 0, 2623 /*74*/ 0, 2624 MatFDColoringApply_BAIJ, 2625 0, 2626 0, 2627 0, 2628 /*79*/ 0, 2629 0, 2630 0, 2631 0, 2632 MatLoad_MPIBAIJ, 2633 /*84*/ 0, 2634 0, 2635 0, 2636 0, 2637 0, 2638 /*89*/ 0, 2639 0, 2640 0, 2641 0, 2642 0, 2643 /*94*/ 0, 2644 0, 2645 0, 2646 0, 2647 0, 2648 /*99*/ 0, 2649 0, 2650 0, 2651 0, 2652 0, 2653 /*104*/0, 2654 MatRealPart_MPIBAIJ, 2655 MatImaginaryPart_MPIBAIJ, 2656 0, 2657 0, 2658 /*109*/0, 2659 0, 2660 0, 2661 0, 2662 0, 2663 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2664 0, 2665 MatGetGhosts_MPIBAIJ, 2666 0, 2667 0, 2668 /*119*/0, 2669 0, 2670 0, 2671 0, 2672 MatGetMultiProcBlock_MPIBAIJ, 2673 /*124*/0, 2674 MatGetColumnNorms_MPIBAIJ, 2675 MatInvertBlockDiagonal_MPIBAIJ, 2676 0, 2677 0, 2678 /*129*/ 0, 2679 0, 2680 0, 2681 0, 2682 0, 2683 /*134*/ 0, 2684 0, 2685 0, 2686 0, 2687 0, 2688 /*139*/ 0, 2689 0, 2690 0, 2691 MatFDColoringSetUp_MPIXAIJ 2692 }; 2693 2694 #undef __FUNCT__ 2695 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2696 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2697 { 2698 PetscFunctionBegin; 2699 *a = ((Mat_MPIBAIJ*)A->data)->A; 2700 PetscFunctionReturn(0); 2701 } 2702 2703 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2704 2705 #undef __FUNCT__ 2706 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2707 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2708 { 2709 PetscInt m,rstart,cstart,cend; 2710 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2711 const PetscInt *JJ =0; 2712 PetscScalar *values=0; 2713 PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented; 2714 PetscErrorCode ierr; 2715 2716 PetscFunctionBegin; 2717 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2718 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2719 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2720 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2721 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2722 m = B->rmap->n/bs; 2723 rstart = B->rmap->rstart/bs; 2724 cstart = B->cmap->rstart/bs; 2725 cend = B->cmap->rend/bs; 2726 2727 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2728 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2729 for (i=0; i<m; i++) { 2730 nz = ii[i+1] - ii[i]; 2731 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2732 nz_max = PetscMax(nz_max,nz); 2733 JJ = jj + ii[i]; 2734 for (j=0; j<nz; j++) { 2735 if (*JJ >= cstart) break; 2736 JJ++; 2737 } 2738 d = 0; 2739 for (; j<nz; j++) { 2740 if (*JJ++ >= cend) break; 2741 d++; 2742 } 2743 d_nnz[i] = d; 2744 o_nnz[i] = nz - d; 2745 } 2746 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2747 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2748 2749 values = (PetscScalar*)V; 2750 if (!values) { 2751 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2752 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2753 } 2754 for (i=0; i<m; i++) { 2755 PetscInt row = i + rstart; 2756 PetscInt ncols = ii[i+1] - ii[i]; 2757 const PetscInt *icols = jj + ii[i]; 2758 if (!roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2759 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2760 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2761 } else { /* block ordering does not match so we can only insert one block at a time. */ 2762 PetscInt j; 2763 for (j=0; j<ncols; j++) { 2764 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2765 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2766 } 2767 } 2768 } 2769 2770 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2771 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2772 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2773 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2774 PetscFunctionReturn(0); 2775 } 2776 2777 #undef __FUNCT__ 2778 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2779 /*@C 2780 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2781 (the default parallel PETSc format). 2782 2783 Collective on MPI_Comm 2784 2785 Input Parameters: 2786 + A - the matrix 2787 . bs - the block size 2788 . i - the indices into j for the start of each local row (starts with zero) 2789 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2790 - v - optional values in the matrix 2791 2792 Level: developer 2793 2794 Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 2795 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 2796 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2797 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2798 block column and the second index is over columns within a block. 2799 2800 .keywords: matrix, aij, compressed row, sparse, parallel 2801 2802 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ 2803 @*/ 2804 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2805 { 2806 PetscErrorCode ierr; 2807 2808 PetscFunctionBegin; 2809 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2810 PetscValidType(B,1); 2811 PetscValidLogicalCollectiveInt(B,bs,2); 2812 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2813 PetscFunctionReturn(0); 2814 } 2815 2816 #undef __FUNCT__ 2817 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2818 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2819 { 2820 Mat_MPIBAIJ *b; 2821 PetscErrorCode ierr; 2822 PetscInt i; 2823 2824 PetscFunctionBegin; 2825 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2826 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2827 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2828 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2829 2830 if (d_nnz) { 2831 for (i=0; i<B->rmap->n/bs; i++) { 2832 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2833 } 2834 } 2835 if (o_nnz) { 2836 for (i=0; i<B->rmap->n/bs; i++) { 2837 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2838 } 2839 } 2840 2841 b = (Mat_MPIBAIJ*)B->data; 2842 b->bs2 = bs*bs; 2843 b->mbs = B->rmap->n/bs; 2844 b->nbs = B->cmap->n/bs; 2845 b->Mbs = B->rmap->N/bs; 2846 b->Nbs = B->cmap->N/bs; 2847 2848 for (i=0; i<=b->size; i++) { 2849 b->rangebs[i] = B->rmap->range[i]/bs; 2850 } 2851 b->rstartbs = B->rmap->rstart/bs; 2852 b->rendbs = B->rmap->rend/bs; 2853 b->cstartbs = B->cmap->rstart/bs; 2854 b->cendbs = B->cmap->rend/bs; 2855 2856 if (!B->preallocated) { 2857 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2858 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2859 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2860 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2861 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2862 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2863 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2864 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2865 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2866 } 2867 2868 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2869 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2870 B->preallocated = PETSC_TRUE; 2871 PetscFunctionReturn(0); 2872 } 2873 2874 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2875 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2876 2877 #undef __FUNCT__ 2878 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 2879 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 2880 { 2881 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2882 PetscErrorCode ierr; 2883 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2884 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2885 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2886 2887 PetscFunctionBegin; 2888 ierr = PetscMalloc1((M+1),&ii);CHKERRQ(ierr); 2889 ii[0] = 0; 2890 for (i=0; i<M; i++) { 2891 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2892 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 2893 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2894 /* remove one from count of matrix has diagonal */ 2895 for (j=id[i]; j<id[i+1]; j++) { 2896 if (jd[j] == i) {ii[i+1]--;break;} 2897 } 2898 } 2899 ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr); 2900 cnt = 0; 2901 for (i=0; i<M; i++) { 2902 for (j=io[i]; j<io[i+1]; j++) { 2903 if (garray[jo[j]] > rstart) break; 2904 jj[cnt++] = garray[jo[j]]; 2905 } 2906 for (k=id[i]; k<id[i+1]; k++) { 2907 if (jd[k] != i) { 2908 jj[cnt++] = rstart + jd[k]; 2909 } 2910 } 2911 for (; j<io[i+1]; j++) { 2912 jj[cnt++] = garray[jo[j]]; 2913 } 2914 } 2915 ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr); 2916 PetscFunctionReturn(0); 2917 } 2918 2919 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2920 2921 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 2922 2923 #undef __FUNCT__ 2924 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 2925 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 2926 { 2927 PetscErrorCode ierr; 2928 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2929 Mat B; 2930 Mat_MPIAIJ *b; 2931 2932 PetscFunctionBegin; 2933 if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 2934 2935 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2936 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2937 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2938 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 2939 ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 2940 b = (Mat_MPIAIJ*) B->data; 2941 2942 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 2943 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2944 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 2945 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 2946 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 2947 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2948 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2949 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2950 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2951 if (reuse == MAT_REUSE_MATRIX) { 2952 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 2953 } else { 2954 *newmat = B; 2955 } 2956 PetscFunctionReturn(0); 2957 } 2958 2959 #if defined(PETSC_HAVE_MUMPS) 2960 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 2961 #endif 2962 2963 /*MC 2964 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2965 2966 Options Database Keys: 2967 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2968 . -mat_block_size <bs> - set the blocksize used to store the matrix 2969 - -mat_use_hash_table <fact> 2970 2971 Level: beginner 2972 2973 .seealso: MatCreateMPIBAIJ 2974 M*/ 2975 2976 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 2977 2978 #undef __FUNCT__ 2979 #define __FUNCT__ "MatCreate_MPIBAIJ" 2980 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2981 { 2982 Mat_MPIBAIJ *b; 2983 PetscErrorCode ierr; 2984 PetscBool flg; 2985 2986 PetscFunctionBegin; 2987 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2988 B->data = (void*)b; 2989 2990 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2991 B->assembled = PETSC_FALSE; 2992 2993 B->insertmode = NOT_SET_VALUES; 2994 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2995 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2996 2997 /* build local table of row and column ownerships */ 2998 ierr = PetscMalloc1((b->size+1),&b->rangebs);CHKERRQ(ierr); 2999 3000 /* build cache for off array entries formed */ 3001 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 3002 3003 b->donotstash = PETSC_FALSE; 3004 b->colmap = NULL; 3005 b->garray = NULL; 3006 b->roworiented = PETSC_TRUE; 3007 3008 /* stuff used in block assembly */ 3009 b->barray = 0; 3010 3011 /* stuff used for matrix vector multiply */ 3012 b->lvec = 0; 3013 b->Mvctx = 0; 3014 3015 /* stuff for MatGetRow() */ 3016 b->rowindices = 0; 3017 b->rowvalues = 0; 3018 b->getrowactive = PETSC_FALSE; 3019 3020 /* hash table stuff */ 3021 b->ht = 0; 3022 b->hd = 0; 3023 b->ht_size = 0; 3024 b->ht_flag = PETSC_FALSE; 3025 b->ht_fact = 0; 3026 b->ht_total_ct = 0; 3027 b->ht_insert_ct = 0; 3028 3029 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 3030 b->ijonly = PETSC_FALSE; 3031 3032 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3033 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr); 3034 if (flg) { 3035 PetscReal fact = 1.39; 3036 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3037 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 3038 if (fact <= 1.0) fact = 1.39; 3039 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3040 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3041 } 3042 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3043 3044 #if defined(PETSC_HAVE_MUMPS) 3045 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3046 #endif 3047 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3048 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3049 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3050 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3051 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3052 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3053 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3054 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3055 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3056 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3057 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr); 3058 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3059 PetscFunctionReturn(0); 3060 } 3061 3062 /*MC 3063 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3064 3065 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3066 and MATMPIBAIJ otherwise. 3067 3068 Options Database Keys: 3069 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3070 3071 Level: beginner 3072 3073 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3074 M*/ 3075 3076 #undef __FUNCT__ 3077 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3078 /*@C 3079 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3080 (block compressed row). For good matrix assembly performance 3081 the user should preallocate the matrix storage by setting the parameters 3082 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3083 performance can be increased by more than a factor of 50. 3084 3085 Collective on Mat 3086 3087 Input Parameters: 3088 + A - the matrix 3089 . bs - size of block 3090 . d_nz - number of block nonzeros per block row in diagonal portion of local 3091 submatrix (same for all local rows) 3092 . d_nnz - array containing the number of block nonzeros in the various block rows 3093 of the in diagonal portion of the local (possibly different for each block 3094 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 3095 set it even if it is zero. 3096 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3097 submatrix (same for all local rows). 3098 - o_nnz - array containing the number of nonzeros in the various block rows of the 3099 off-diagonal portion of the local submatrix (possibly different for 3100 each block row) or NULL. 3101 3102 If the *_nnz parameter is given then the *_nz parameter is ignored 3103 3104 Options Database Keys: 3105 + -mat_block_size - size of the blocks to use 3106 - -mat_use_hash_table <fact> 3107 3108 Notes: 3109 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3110 than it must be used on all processors that share the object for that argument. 3111 3112 Storage Information: 3113 For a square global matrix we define each processor's diagonal portion 3114 to be its local rows and the corresponding columns (a square submatrix); 3115 each processor's off-diagonal portion encompasses the remainder of the 3116 local matrix (a rectangular submatrix). 3117 3118 The user can specify preallocated storage for the diagonal part of 3119 the local submatrix with either d_nz or d_nnz (not both). Set 3120 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3121 memory allocation. Likewise, specify preallocated storage for the 3122 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3123 3124 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3125 the figure below we depict these three local rows and all columns (0-11). 3126 3127 .vb 3128 0 1 2 3 4 5 6 7 8 9 10 11 3129 -------------------------- 3130 row 3 |o o o d d d o o o o o o 3131 row 4 |o o o d d d o o o o o o 3132 row 5 |o o o d d d o o o o o o 3133 -------------------------- 3134 .ve 3135 3136 Thus, any entries in the d locations are stored in the d (diagonal) 3137 submatrix, and any entries in the o locations are stored in the 3138 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3139 stored simply in the MATSEQBAIJ format for compressed row storage. 3140 3141 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3142 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3143 In general, for PDE problems in which most nonzeros are near the diagonal, 3144 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3145 or you will get TERRIBLE performance; see the users' manual chapter on 3146 matrices. 3147 3148 You can call MatGetInfo() to get information on how effective the preallocation was; 3149 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3150 You can also run with the option -info and look for messages with the string 3151 malloc in them to see if additional memory allocation was needed. 3152 3153 Level: intermediate 3154 3155 .keywords: matrix, block, aij, compressed row, sparse, parallel 3156 3157 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3158 @*/ 3159 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3160 { 3161 PetscErrorCode ierr; 3162 3163 PetscFunctionBegin; 3164 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3165 PetscValidType(B,1); 3166 PetscValidLogicalCollectiveInt(B,bs,2); 3167 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3168 PetscFunctionReturn(0); 3169 } 3170 3171 #undef __FUNCT__ 3172 #define __FUNCT__ "MatCreateBAIJ" 3173 /*@C 3174 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3175 (block compressed row). For good matrix assembly performance 3176 the user should preallocate the matrix storage by setting the parameters 3177 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3178 performance can be increased by more than a factor of 50. 3179 3180 Collective on MPI_Comm 3181 3182 Input Parameters: 3183 + comm - MPI communicator 3184 . bs - size of blockk 3185 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3186 This value should be the same as the local size used in creating the 3187 y vector for the matrix-vector product y = Ax. 3188 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3189 This value should be the same as the local size used in creating the 3190 x vector for the matrix-vector product y = Ax. 3191 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3192 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3193 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3194 submatrix (same for all local rows) 3195 . d_nnz - array containing the number of nonzero blocks in the various block rows 3196 of the in diagonal portion of the local (possibly different for each block 3197 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3198 and set it even if it is zero. 3199 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3200 submatrix (same for all local rows). 3201 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3202 off-diagonal portion of the local submatrix (possibly different for 3203 each block row) or NULL. 3204 3205 Output Parameter: 3206 . A - the matrix 3207 3208 Options Database Keys: 3209 + -mat_block_size - size of the blocks to use 3210 - -mat_use_hash_table <fact> 3211 3212 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3213 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3214 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3215 3216 Notes: 3217 If the *_nnz parameter is given then the *_nz parameter is ignored 3218 3219 A nonzero block is any block that as 1 or more nonzeros in it 3220 3221 The user MUST specify either the local or global matrix dimensions 3222 (possibly both). 3223 3224 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3225 than it must be used on all processors that share the object for that argument. 3226 3227 Storage Information: 3228 For a square global matrix we define each processor's diagonal portion 3229 to be its local rows and the corresponding columns (a square submatrix); 3230 each processor's off-diagonal portion encompasses the remainder of the 3231 local matrix (a rectangular submatrix). 3232 3233 The user can specify preallocated storage for the diagonal part of 3234 the local submatrix with either d_nz or d_nnz (not both). Set 3235 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3236 memory allocation. Likewise, specify preallocated storage for the 3237 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3238 3239 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3240 the figure below we depict these three local rows and all columns (0-11). 3241 3242 .vb 3243 0 1 2 3 4 5 6 7 8 9 10 11 3244 -------------------------- 3245 row 3 |o o o d d d o o o o o o 3246 row 4 |o o o d d d o o o o o o 3247 row 5 |o o o d d d o o o o o o 3248 -------------------------- 3249 .ve 3250 3251 Thus, any entries in the d locations are stored in the d (diagonal) 3252 submatrix, and any entries in the o locations are stored in the 3253 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3254 stored simply in the MATSEQBAIJ format for compressed row storage. 3255 3256 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3257 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3258 In general, for PDE problems in which most nonzeros are near the diagonal, 3259 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3260 or you will get TERRIBLE performance; see the users' manual chapter on 3261 matrices. 3262 3263 Level: intermediate 3264 3265 .keywords: matrix, block, aij, compressed row, sparse, parallel 3266 3267 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3268 @*/ 3269 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3270 { 3271 PetscErrorCode ierr; 3272 PetscMPIInt size; 3273 3274 PetscFunctionBegin; 3275 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3276 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3277 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3278 if (size > 1) { 3279 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3280 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3281 } else { 3282 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3283 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3284 } 3285 PetscFunctionReturn(0); 3286 } 3287 3288 #undef __FUNCT__ 3289 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3290 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3291 { 3292 Mat mat; 3293 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3294 PetscErrorCode ierr; 3295 PetscInt len=0; 3296 3297 PetscFunctionBegin; 3298 *newmat = 0; 3299 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3300 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3301 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3302 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3303 3304 mat->factortype = matin->factortype; 3305 mat->preallocated = PETSC_TRUE; 3306 mat->assembled = PETSC_TRUE; 3307 mat->insertmode = NOT_SET_VALUES; 3308 3309 a = (Mat_MPIBAIJ*)mat->data; 3310 mat->rmap->bs = matin->rmap->bs; 3311 a->bs2 = oldmat->bs2; 3312 a->mbs = oldmat->mbs; 3313 a->nbs = oldmat->nbs; 3314 a->Mbs = oldmat->Mbs; 3315 a->Nbs = oldmat->Nbs; 3316 3317 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3318 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3319 3320 a->size = oldmat->size; 3321 a->rank = oldmat->rank; 3322 a->donotstash = oldmat->donotstash; 3323 a->roworiented = oldmat->roworiented; 3324 a->rowindices = 0; 3325 a->rowvalues = 0; 3326 a->getrowactive = PETSC_FALSE; 3327 a->barray = 0; 3328 a->rstartbs = oldmat->rstartbs; 3329 a->rendbs = oldmat->rendbs; 3330 a->cstartbs = oldmat->cstartbs; 3331 a->cendbs = oldmat->cendbs; 3332 3333 /* hash table stuff */ 3334 a->ht = 0; 3335 a->hd = 0; 3336 a->ht_size = 0; 3337 a->ht_flag = oldmat->ht_flag; 3338 a->ht_fact = oldmat->ht_fact; 3339 a->ht_total_ct = 0; 3340 a->ht_insert_ct = 0; 3341 3342 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3343 if (oldmat->colmap) { 3344 #if defined(PETSC_USE_CTABLE) 3345 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3346 #else 3347 ierr = PetscMalloc1((a->Nbs),&a->colmap);CHKERRQ(ierr); 3348 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3349 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3350 #endif 3351 } else a->colmap = 0; 3352 3353 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3354 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 3355 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3356 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3357 } else a->garray = 0; 3358 3359 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3360 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3361 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3362 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3363 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3364 3365 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3366 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3367 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3368 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3369 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3370 *newmat = mat; 3371 PetscFunctionReturn(0); 3372 } 3373 3374 #undef __FUNCT__ 3375 #define __FUNCT__ "MatLoad_MPIBAIJ" 3376 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3377 { 3378 PetscErrorCode ierr; 3379 int fd; 3380 PetscInt i,nz,j,rstart,rend; 3381 PetscScalar *vals,*buf; 3382 MPI_Comm comm; 3383 MPI_Status status; 3384 PetscMPIInt rank,size,maxnz; 3385 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3386 PetscInt *locrowlens = NULL,*procsnz = NULL,*browners = NULL; 3387 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3388 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3389 PetscInt *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount; 3390 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3391 3392 PetscFunctionBegin; 3393 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3394 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3395 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3396 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3397 3398 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3399 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3400 if (!rank) { 3401 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3402 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3403 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3404 } 3405 3406 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3407 3408 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3409 M = header[1]; N = header[2]; 3410 3411 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3412 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3413 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3414 3415 /* If global sizes are set, check if they are consistent with that given in the file */ 3416 if (sizesset) { 3417 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3418 } 3419 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3420 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3421 3422 if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices"); 3423 3424 /* 3425 This code adds extra rows to make sure the number of rows is 3426 divisible by the blocksize 3427 */ 3428 Mbs = M/bs; 3429 extra_rows = bs - M + bs*Mbs; 3430 if (extra_rows == bs) extra_rows = 0; 3431 else Mbs++; 3432 if (extra_rows && !rank) { 3433 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3434 } 3435 3436 /* determine ownership of all rows */ 3437 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3438 mbs = Mbs/size + ((Mbs % size) > rank); 3439 m = mbs*bs; 3440 } else { /* User set */ 3441 m = newmat->rmap->n; 3442 mbs = m/bs; 3443 } 3444 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 3445 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3446 3447 /* process 0 needs enough room for process with most rows */ 3448 if (!rank) { 3449 mmax = rowners[1]; 3450 for (i=2; i<=size; i++) { 3451 mmax = PetscMax(mmax,rowners[i]); 3452 } 3453 mmax*=bs; 3454 } else mmax = -1; /* unused, but compiler warns anyway */ 3455 3456 rowners[0] = 0; 3457 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3458 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3459 rstart = rowners[rank]; 3460 rend = rowners[rank+1]; 3461 3462 /* distribute row lengths to all processors */ 3463 ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr); 3464 if (!rank) { 3465 mend = m; 3466 if (size == 1) mend = mend - extra_rows; 3467 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3468 for (j=mend; j<m; j++) locrowlens[j] = 1; 3469 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3470 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3471 for (j=0; j<m; j++) { 3472 procsnz[0] += locrowlens[j]; 3473 } 3474 for (i=1; i<size; i++) { 3475 mend = browners[i+1] - browners[i]; 3476 if (i == size-1) mend = mend - extra_rows; 3477 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3478 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3479 /* calculate the number of nonzeros on each processor */ 3480 for (j=0; j<browners[i+1]-browners[i]; j++) { 3481 procsnz[i] += rowlengths[j]; 3482 } 3483 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3484 } 3485 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3486 } else { 3487 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3488 } 3489 3490 if (!rank) { 3491 /* determine max buffer needed and allocate it */ 3492 maxnz = procsnz[0]; 3493 for (i=1; i<size; i++) { 3494 maxnz = PetscMax(maxnz,procsnz[i]); 3495 } 3496 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3497 3498 /* read in my part of the matrix column indices */ 3499 nz = procsnz[0]; 3500 ierr = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr); 3501 mycols = ibuf; 3502 if (size == 1) nz -= extra_rows; 3503 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3504 if (size == 1) { 3505 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 3506 } 3507 3508 /* read in every ones (except the last) and ship off */ 3509 for (i=1; i<size-1; i++) { 3510 nz = procsnz[i]; 3511 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3512 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3513 } 3514 /* read in the stuff for the last proc */ 3515 if (size != 1) { 3516 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3517 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3518 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3519 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3520 } 3521 ierr = PetscFree(cols);CHKERRQ(ierr); 3522 } else { 3523 /* determine buffer space needed for message */ 3524 nz = 0; 3525 for (i=0; i<m; i++) { 3526 nz += locrowlens[i]; 3527 } 3528 ierr = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr); 3529 mycols = ibuf; 3530 /* receive message of column indices*/ 3531 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3532 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3533 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3534 } 3535 3536 /* loop over local rows, determining number of off diagonal entries */ 3537 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 3538 ierr = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 3539 rowcount = 0; nzcount = 0; 3540 for (i=0; i<mbs; i++) { 3541 dcount = 0; 3542 odcount = 0; 3543 for (j=0; j<bs; j++) { 3544 kmax = locrowlens[rowcount]; 3545 for (k=0; k<kmax; k++) { 3546 tmp = mycols[nzcount++]/bs; 3547 if (!mask[tmp]) { 3548 mask[tmp] = 1; 3549 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3550 else masked1[dcount++] = tmp; 3551 } 3552 } 3553 rowcount++; 3554 } 3555 3556 dlens[i] = dcount; 3557 odlens[i] = odcount; 3558 3559 /* zero out the mask elements we set */ 3560 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3561 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3562 } 3563 3564 3565 if (!sizesset) { 3566 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3567 } 3568 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3569 3570 if (!rank) { 3571 ierr = PetscMalloc1((maxnz+1),&buf);CHKERRQ(ierr); 3572 /* read in my part of the matrix numerical values */ 3573 nz = procsnz[0]; 3574 vals = buf; 3575 mycols = ibuf; 3576 if (size == 1) nz -= extra_rows; 3577 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3578 if (size == 1) { 3579 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 3580 } 3581 3582 /* insert into matrix */ 3583 jj = rstart*bs; 3584 for (i=0; i<m; i++) { 3585 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3586 mycols += locrowlens[i]; 3587 vals += locrowlens[i]; 3588 jj++; 3589 } 3590 /* read in other processors (except the last one) and ship out */ 3591 for (i=1; i<size-1; i++) { 3592 nz = procsnz[i]; 3593 vals = buf; 3594 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3595 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3596 } 3597 /* the last proc */ 3598 if (size != 1) { 3599 nz = procsnz[i] - extra_rows; 3600 vals = buf; 3601 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3602 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3603 ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3604 } 3605 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3606 } else { 3607 /* receive numeric values */ 3608 ierr = PetscMalloc1((nz+1),&buf);CHKERRQ(ierr); 3609 3610 /* receive message of values*/ 3611 vals = buf; 3612 mycols = ibuf; 3613 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3614 3615 /* insert into matrix */ 3616 jj = rstart*bs; 3617 for (i=0; i<m; i++) { 3618 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3619 mycols += locrowlens[i]; 3620 vals += locrowlens[i]; 3621 jj++; 3622 } 3623 } 3624 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3625 ierr = PetscFree(buf);CHKERRQ(ierr); 3626 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3627 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3628 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3629 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3630 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3631 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3632 PetscFunctionReturn(0); 3633 } 3634 3635 #undef __FUNCT__ 3636 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3637 /*@ 3638 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3639 3640 Input Parameters: 3641 . mat - the matrix 3642 . fact - factor 3643 3644 Not Collective, each process can use a different factor 3645 3646 Level: advanced 3647 3648 Notes: 3649 This can also be set by the command line option: -mat_use_hash_table <fact> 3650 3651 .keywords: matrix, hashtable, factor, HT 3652 3653 .seealso: MatSetOption() 3654 @*/ 3655 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3656 { 3657 PetscErrorCode ierr; 3658 3659 PetscFunctionBegin; 3660 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3661 PetscFunctionReturn(0); 3662 } 3663 3664 #undef __FUNCT__ 3665 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3666 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3667 { 3668 Mat_MPIBAIJ *baij; 3669 3670 PetscFunctionBegin; 3671 baij = (Mat_MPIBAIJ*)mat->data; 3672 baij->ht_fact = fact; 3673 PetscFunctionReturn(0); 3674 } 3675 3676 #undef __FUNCT__ 3677 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3678 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3679 { 3680 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3681 3682 PetscFunctionBegin; 3683 *Ad = a->A; 3684 *Ao = a->B; 3685 *colmap = a->garray; 3686 PetscFunctionReturn(0); 3687 } 3688 3689 /* 3690 Special version for direct calls from Fortran (to eliminate two function call overheads 3691 */ 3692 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3693 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3694 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3695 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3696 #endif 3697 3698 #undef __FUNCT__ 3699 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3700 /*@C 3701 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3702 3703 Collective on Mat 3704 3705 Input Parameters: 3706 + mat - the matrix 3707 . min - number of input rows 3708 . im - input rows 3709 . nin - number of input columns 3710 . in - input columns 3711 . v - numerical values input 3712 - addvin - INSERT_VALUES or ADD_VALUES 3713 3714 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3715 3716 Level: advanced 3717 3718 .seealso: MatSetValuesBlocked() 3719 @*/ 3720 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3721 { 3722 /* convert input arguments to C version */ 3723 Mat mat = *matin; 3724 PetscInt m = *min, n = *nin; 3725 InsertMode addv = *addvin; 3726 3727 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3728 const MatScalar *value; 3729 MatScalar *barray = baij->barray; 3730 PetscBool roworiented = baij->roworiented; 3731 PetscErrorCode ierr; 3732 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3733 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3734 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3735 3736 PetscFunctionBegin; 3737 /* tasks normally handled by MatSetValuesBlocked() */ 3738 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3739 #if defined(PETSC_USE_DEBUG) 3740 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3741 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3742 #endif 3743 if (mat->assembled) { 3744 mat->was_assembled = PETSC_TRUE; 3745 mat->assembled = PETSC_FALSE; 3746 } 3747 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3748 3749 3750 if (!barray) { 3751 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 3752 baij->barray = barray; 3753 } 3754 3755 if (roworiented) stepval = (n-1)*bs; 3756 else stepval = (m-1)*bs; 3757 3758 for (i=0; i<m; i++) { 3759 if (im[i] < 0) continue; 3760 #if defined(PETSC_USE_DEBUG) 3761 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3762 #endif 3763 if (im[i] >= rstart && im[i] < rend) { 3764 row = im[i] - rstart; 3765 for (j=0; j<n; j++) { 3766 /* If NumCol = 1 then a copy is not required */ 3767 if ((roworiented) && (n == 1)) { 3768 barray = (MatScalar*)v + i*bs2; 3769 } else if ((!roworiented) && (m == 1)) { 3770 barray = (MatScalar*)v + j*bs2; 3771 } else { /* Here a copy is required */ 3772 if (roworiented) { 3773 value = v + i*(stepval+bs)*bs + j*bs; 3774 } else { 3775 value = v + j*(stepval+bs)*bs + i*bs; 3776 } 3777 for (ii=0; ii<bs; ii++,value+=stepval) { 3778 for (jj=0; jj<bs; jj++) { 3779 *barray++ = *value++; 3780 } 3781 } 3782 barray -=bs2; 3783 } 3784 3785 if (in[j] >= cstart && in[j] < cend) { 3786 col = in[j] - cstart; 3787 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3788 } else if (in[j] < 0) continue; 3789 #if defined(PETSC_USE_DEBUG) 3790 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 3791 #endif 3792 else { 3793 if (mat->was_assembled) { 3794 if (!baij->colmap) { 3795 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3796 } 3797 3798 #if defined(PETSC_USE_DEBUG) 3799 #if defined(PETSC_USE_CTABLE) 3800 { PetscInt data; 3801 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3802 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3803 } 3804 #else 3805 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3806 #endif 3807 #endif 3808 #if defined(PETSC_USE_CTABLE) 3809 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3810 col = (col - 1)/bs; 3811 #else 3812 col = (baij->colmap[in[j]] - 1)/bs; 3813 #endif 3814 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3815 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3816 col = in[j]; 3817 } 3818 } else col = in[j]; 3819 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3820 } 3821 } 3822 } else { 3823 if (!baij->donotstash) { 3824 if (roworiented) { 3825 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3826 } else { 3827 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3828 } 3829 } 3830 } 3831 } 3832 3833 /* task normally handled by MatSetValuesBlocked() */ 3834 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3835 PetscFunctionReturn(0); 3836 } 3837 3838 #undef __FUNCT__ 3839 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 3840 /*@ 3841 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 3842 CSR format the local rows. 3843 3844 Collective on MPI_Comm 3845 3846 Input Parameters: 3847 + comm - MPI communicator 3848 . bs - the block size, only a block size of 1 is supported 3849 . m - number of local rows (Cannot be PETSC_DECIDE) 3850 . n - This value should be the same as the local size used in creating the 3851 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3852 calculated if N is given) For square matrices n is almost always m. 3853 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3854 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3855 . i - row indices 3856 . j - column indices 3857 - a - matrix values 3858 3859 Output Parameter: 3860 . mat - the matrix 3861 3862 Level: intermediate 3863 3864 Notes: 3865 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3866 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3867 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3868 3869 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3870 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3871 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3872 with column-major ordering within blocks. 3873 3874 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3875 3876 .keywords: matrix, aij, compressed row, sparse, parallel 3877 3878 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3879 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3880 @*/ 3881 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3882 { 3883 PetscErrorCode ierr; 3884 3885 PetscFunctionBegin; 3886 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3887 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3888 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3889 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3890 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 3891 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3892 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3893 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3894 PetscFunctionReturn(0); 3895 } 3896