1 2 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 3 4 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 5 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 6 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,int,IS[],int); 7 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat *[]); 8 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int [],PetscScalar []); 9 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,int,const int[],int,const int [],const PetscScalar [],InsertMode); 10 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode); 11 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]); 12 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]); 13 EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat); 14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*); 15 16 /* UGLY, ugly, ugly 17 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 18 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 19 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 20 converts the entries into single precision and then calls ..._MatScalar() to put them 21 into the single precision data structures. 22 */ 23 #if defined(PETSC_USE_MAT_SINGLE) 24 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 25 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 26 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 27 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 28 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode); 29 #else 30 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 31 #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 32 #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 33 #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 34 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 35 #endif 36 37 #undef __FUNCT__ 38 #define __FUNCT__ "MatGetRowMax_MPIBAIJ" 39 PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v) 40 { 41 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 42 PetscErrorCode ierr; 43 int i; 44 PetscScalar *va,*vb; 45 Vec vtmp; 46 47 PetscFunctionBegin; 48 49 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 50 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 51 52 ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr); 53 ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr); 54 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 55 56 for (i=0; i<A->m; i++){ 57 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i]; 58 } 59 60 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 61 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 62 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 63 64 PetscFunctionReturn(0); 65 } 66 67 EXTERN_C_BEGIN 68 #undef __FUNCT__ 69 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 70 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 71 { 72 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 73 PetscErrorCode ierr; 74 75 PetscFunctionBegin; 76 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 77 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 EXTERN_C_END 81 82 EXTERN_C_BEGIN 83 #undef __FUNCT__ 84 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 85 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 86 { 87 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 88 PetscErrorCode ierr; 89 90 PetscFunctionBegin; 91 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 92 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 93 PetscFunctionReturn(0); 94 } 95 EXTERN_C_END 96 97 /* 98 Local utility routine that creates a mapping from the global column 99 number to the local number in the off-diagonal part of the local 100 storage of the matrix. This is done in a non scable way since the 101 length of colmap equals the global matrix length. 102 */ 103 #undef __FUNCT__ 104 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 105 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 106 { 107 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 108 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 109 PetscErrorCode ierr; 110 int nbs = B->nbs,i,bs=B->bs; 111 112 PetscFunctionBegin; 113 #if defined (PETSC_USE_CTABLE) 114 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 115 for (i=0; i<nbs; i++){ 116 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 117 } 118 #else 119 ierr = PetscMalloc((baij->Nbs+1)*sizeof(int),&baij->colmap);CHKERRQ(ierr); 120 PetscLogObjectMemory(mat,baij->Nbs*sizeof(int)); 121 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 122 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 123 #endif 124 PetscFunctionReturn(0); 125 } 126 127 #define CHUNKSIZE 10 128 129 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 130 { \ 131 \ 132 brow = row/bs; \ 133 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 134 rmax = aimax[brow]; nrow = ailen[brow]; \ 135 bcol = col/bs; \ 136 ridx = row % bs; cidx = col % bs; \ 137 low = 0; high = nrow; \ 138 while (high-low > 3) { \ 139 t = (low+high)/2; \ 140 if (rp[t] > bcol) high = t; \ 141 else low = t; \ 142 } \ 143 for (_i=low; _i<high; _i++) { \ 144 if (rp[_i] > bcol) break; \ 145 if (rp[_i] == bcol) { \ 146 bap = ap + bs2*_i + bs*cidx + ridx; \ 147 if (addv == ADD_VALUES) *bap += value; \ 148 else *bap = value; \ 149 goto a_noinsert; \ 150 } \ 151 } \ 152 if (a->nonew == 1) goto a_noinsert; \ 153 else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 154 if (nrow >= rmax) { \ 155 /* there is no extra room in row, therefore enlarge */ \ 156 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 157 MatScalar *new_a; \ 158 \ 159 if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 160 \ 161 /* malloc new storage space */ \ 162 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 163 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 164 new_j = (int*)(new_a + bs2*new_nz); \ 165 new_i = new_j + new_nz; \ 166 \ 167 /* copy over old data into new slots */ \ 168 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 169 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 170 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 171 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 172 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 173 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 174 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 175 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 176 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 177 /* free up old matrix storage */ \ 178 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 179 if (!a->singlemalloc) { \ 180 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 181 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 182 } \ 183 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 184 a->singlemalloc = PETSC_TRUE; \ 185 \ 186 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 187 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 188 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 189 a->maxnz += bs2*CHUNKSIZE; \ 190 a->reallocs++; \ 191 a->nz++; \ 192 } \ 193 N = nrow++ - 1; \ 194 /* shift up all the later entries in this row */ \ 195 for (ii=N; ii>=_i; ii--) { \ 196 rp[ii+1] = rp[ii]; \ 197 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 198 } \ 199 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 200 rp[_i] = bcol; \ 201 ap[bs2*_i + bs*cidx + ridx] = value; \ 202 a_noinsert:; \ 203 ailen[brow] = nrow; \ 204 } 205 206 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 207 { \ 208 brow = row/bs; \ 209 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 210 rmax = bimax[brow]; nrow = bilen[brow]; \ 211 bcol = col/bs; \ 212 ridx = row % bs; cidx = col % bs; \ 213 low = 0; high = nrow; \ 214 while (high-low > 3) { \ 215 t = (low+high)/2; \ 216 if (rp[t] > bcol) high = t; \ 217 else low = t; \ 218 } \ 219 for (_i=low; _i<high; _i++) { \ 220 if (rp[_i] > bcol) break; \ 221 if (rp[_i] == bcol) { \ 222 bap = ap + bs2*_i + bs*cidx + ridx; \ 223 if (addv == ADD_VALUES) *bap += value; \ 224 else *bap = value; \ 225 goto b_noinsert; \ 226 } \ 227 } \ 228 if (b->nonew == 1) goto b_noinsert; \ 229 else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 230 if (nrow >= rmax) { \ 231 /* there is no extra room in row, therefore enlarge */ \ 232 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 233 MatScalar *new_a; \ 234 \ 235 if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 236 \ 237 /* malloc new storage space */ \ 238 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 239 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 240 new_j = (int*)(new_a + bs2*new_nz); \ 241 new_i = new_j + new_nz; \ 242 \ 243 /* copy over old data into new slots */ \ 244 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 245 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 246 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 247 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 248 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 249 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 250 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 251 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 252 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 253 /* free up old matrix storage */ \ 254 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 255 if (!b->singlemalloc) { \ 256 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 257 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 258 } \ 259 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 260 b->singlemalloc = PETSC_TRUE; \ 261 \ 262 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 263 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 264 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 265 b->maxnz += bs2*CHUNKSIZE; \ 266 b->reallocs++; \ 267 b->nz++; \ 268 } \ 269 N = nrow++ - 1; \ 270 /* shift up all the later entries in this row */ \ 271 for (ii=N; ii>=_i; ii--) { \ 272 rp[ii+1] = rp[ii]; \ 273 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 274 } \ 275 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 276 rp[_i] = bcol; \ 277 ap[bs2*_i + bs*cidx + ridx] = value; \ 278 b_noinsert:; \ 279 bilen[brow] = nrow; \ 280 } 281 282 #if defined(PETSC_USE_MAT_SINGLE) 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatSetValues_MPIBAIJ" 285 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 286 { 287 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 288 PetscErrorCode ierr; 289 int i,N = m*n; 290 MatScalar *vsingle; 291 292 PetscFunctionBegin; 293 if (N > b->setvalueslen) { 294 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 295 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 296 b->setvalueslen = N; 297 } 298 vsingle = b->setvaluescopy; 299 300 for (i=0; i<N; i++) { 301 vsingle[i] = v[i]; 302 } 303 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 304 PetscFunctionReturn(0); 305 } 306 307 #undef __FUNCT__ 308 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 309 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 310 { 311 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 312 PetscErrorCode ierr; 313 int i,N = m*n*b->bs2; 314 MatScalar *vsingle; 315 316 PetscFunctionBegin; 317 if (N > b->setvalueslen) { 318 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 319 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 320 b->setvalueslen = N; 321 } 322 vsingle = b->setvaluescopy; 323 for (i=0; i<N; i++) { 324 vsingle[i] = v[i]; 325 } 326 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 327 PetscFunctionReturn(0); 328 } 329 330 #undef __FUNCT__ 331 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 332 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 333 { 334 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 335 PetscErrorCode ierr; 336 int i,N = m*n; 337 MatScalar *vsingle; 338 339 PetscFunctionBegin; 340 if (N > b->setvalueslen) { 341 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 342 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 343 b->setvalueslen = N; 344 } 345 vsingle = b->setvaluescopy; 346 for (i=0; i<N; i++) { 347 vsingle[i] = v[i]; 348 } 349 ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 350 PetscFunctionReturn(0); 351 } 352 353 #undef __FUNCT__ 354 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 355 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 356 { 357 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 358 PetscErrorCode ierr; 359 int i,N = m*n*b->bs2; 360 MatScalar *vsingle; 361 362 PetscFunctionBegin; 363 if (N > b->setvalueslen) { 364 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 365 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 366 b->setvalueslen = N; 367 } 368 vsingle = b->setvaluescopy; 369 for (i=0; i<N; i++) { 370 vsingle[i] = v[i]; 371 } 372 ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 373 PetscFunctionReturn(0); 374 } 375 #endif 376 377 #undef __FUNCT__ 378 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 379 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 380 { 381 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 382 MatScalar value; 383 PetscTruth roworiented = baij->roworiented; 384 PetscErrorCode ierr; 385 int i,j,row,col; 386 int rstart_orig=baij->rstart_bs; 387 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 388 int cend_orig=baij->cend_bs,bs=baij->bs; 389 390 /* Some Variables required in the macro */ 391 Mat A = baij->A; 392 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 393 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 394 MatScalar *aa=a->a; 395 396 Mat B = baij->B; 397 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 398 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 399 MatScalar *ba=b->a; 400 401 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 402 int low,high,t,ridx,cidx,bs2=a->bs2; 403 MatScalar *ap,*bap; 404 405 PetscFunctionBegin; 406 for (i=0; i<m; i++) { 407 if (im[i] < 0) continue; 408 #if defined(PETSC_USE_BOPT_g) 409 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 410 #endif 411 if (im[i] >= rstart_orig && im[i] < rend_orig) { 412 row = im[i] - rstart_orig; 413 for (j=0; j<n; j++) { 414 if (in[j] >= cstart_orig && in[j] < cend_orig){ 415 col = in[j] - cstart_orig; 416 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 417 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 418 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 419 } else if (in[j] < 0) continue; 420 #if defined(PETSC_USE_BOPT_g) 421 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[i],mat->N-1);} 422 #endif 423 else { 424 if (mat->was_assembled) { 425 if (!baij->colmap) { 426 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 427 } 428 #if defined (PETSC_USE_CTABLE) 429 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 430 col = col - 1; 431 #else 432 col = baij->colmap[in[j]/bs] - 1; 433 #endif 434 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 435 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 436 col = in[j]; 437 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 438 B = baij->B; 439 b = (Mat_SeqBAIJ*)(B)->data; 440 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 441 ba=b->a; 442 } else col += in[j]%bs; 443 } else col = in[j]; 444 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 445 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 446 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 447 } 448 } 449 } else { 450 if (!baij->donotstash) { 451 if (roworiented) { 452 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 453 } else { 454 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 455 } 456 } 457 } 458 } 459 PetscFunctionReturn(0); 460 } 461 462 #undef __FUNCT__ 463 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar" 464 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 465 { 466 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 467 const MatScalar *value; 468 MatScalar *barray=baij->barray; 469 PetscTruth roworiented = baij->roworiented; 470 PetscErrorCode ierr; 471 int i,j,ii,jj,row,col,rstart=baij->rstart; 472 int rend=baij->rend,cstart=baij->cstart,stepval; 473 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 474 475 PetscFunctionBegin; 476 if(!barray) { 477 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 478 baij->barray = barray; 479 } 480 481 if (roworiented) { 482 stepval = (n-1)*bs; 483 } else { 484 stepval = (m-1)*bs; 485 } 486 for (i=0; i<m; i++) { 487 if (im[i] < 0) continue; 488 #if defined(PETSC_USE_BOPT_g) 489 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1); 490 #endif 491 if (im[i] >= rstart && im[i] < rend) { 492 row = im[i] - rstart; 493 for (j=0; j<n; j++) { 494 /* If NumCol = 1 then a copy is not required */ 495 if ((roworiented) && (n == 1)) { 496 barray = (MatScalar*)v + i*bs2; 497 } else if((!roworiented) && (m == 1)) { 498 barray = (MatScalar*)v + j*bs2; 499 } else { /* Here a copy is required */ 500 if (roworiented) { 501 value = v + i*(stepval+bs)*bs + j*bs; 502 } else { 503 value = v + j*(stepval+bs)*bs + i*bs; 504 } 505 for (ii=0; ii<bs; ii++,value+=stepval) { 506 for (jj=0; jj<bs; jj++) { 507 *barray++ = *value++; 508 } 509 } 510 barray -=bs2; 511 } 512 513 if (in[j] >= cstart && in[j] < cend){ 514 col = in[j] - cstart; 515 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 516 } 517 else if (in[j] < 0) continue; 518 #if defined(PETSC_USE_BOPT_g) 519 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);} 520 #endif 521 else { 522 if (mat->was_assembled) { 523 if (!baij->colmap) { 524 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 525 } 526 527 #if defined(PETSC_USE_BOPT_g) 528 #if defined (PETSC_USE_CTABLE) 529 { int data; 530 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 531 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 532 } 533 #else 534 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 535 #endif 536 #endif 537 #if defined (PETSC_USE_CTABLE) 538 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 539 col = (col - 1)/bs; 540 #else 541 col = (baij->colmap[in[j]] - 1)/bs; 542 #endif 543 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 544 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 545 col = in[j]; 546 } 547 } 548 else col = in[j]; 549 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 550 } 551 } 552 } else { 553 if (!baij->donotstash) { 554 if (roworiented) { 555 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 556 } else { 557 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 558 } 559 } 560 } 561 } 562 PetscFunctionReturn(0); 563 } 564 565 #define HASH_KEY 0.6180339887 566 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 567 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 568 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 569 #undef __FUNCT__ 570 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar" 571 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 572 { 573 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 574 PetscTruth roworiented = baij->roworiented; 575 PetscErrorCode ierr; 576 int i,j,row,col; 577 int rstart_orig=baij->rstart_bs; 578 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 579 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 580 PetscReal tmp; 581 MatScalar **HD = baij->hd,value; 582 #if defined(PETSC_USE_BOPT_g) 583 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 584 #endif 585 586 PetscFunctionBegin; 587 588 for (i=0; i<m; i++) { 589 #if defined(PETSC_USE_BOPT_g) 590 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 591 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 592 #endif 593 row = im[i]; 594 if (row >= rstart_orig && row < rend_orig) { 595 for (j=0; j<n; j++) { 596 col = in[j]; 597 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 598 /* Look up into the Hash Table */ 599 key = (row/bs)*Nbs+(col/bs)+1; 600 h1 = HASH(size,key,tmp); 601 602 603 idx = h1; 604 #if defined(PETSC_USE_BOPT_g) 605 insert_ct++; 606 total_ct++; 607 if (HT[idx] != key) { 608 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 609 if (idx == size) { 610 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 611 if (idx == h1) { 612 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col); 613 } 614 } 615 } 616 #else 617 if (HT[idx] != key) { 618 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 619 if (idx == size) { 620 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 621 if (idx == h1) { 622 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col); 623 } 624 } 625 } 626 #endif 627 /* A HASH table entry is found, so insert the values at the correct address */ 628 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 629 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 630 } 631 } else { 632 if (!baij->donotstash) { 633 if (roworiented) { 634 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 635 } else { 636 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 637 } 638 } 639 } 640 } 641 #if defined(PETSC_USE_BOPT_g) 642 baij->ht_total_ct = total_ct; 643 baij->ht_insert_ct = insert_ct; 644 #endif 645 PetscFunctionReturn(0); 646 } 647 648 #undef __FUNCT__ 649 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 650 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv) 651 { 652 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 653 PetscTruth roworiented = baij->roworiented; 654 PetscErrorCode ierr; 655 int i,j,ii,jj,row,col; 656 int rstart=baij->rstart ; 657 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 658 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 659 PetscReal tmp; 660 MatScalar **HD = baij->hd,*baij_a; 661 const MatScalar *v_t,*value; 662 #if defined(PETSC_USE_BOPT_g) 663 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 664 #endif 665 666 PetscFunctionBegin; 667 668 if (roworiented) { 669 stepval = (n-1)*bs; 670 } else { 671 stepval = (m-1)*bs; 672 } 673 for (i=0; i<m; i++) { 674 #if defined(PETSC_USE_BOPT_g) 675 if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",im[i]); 676 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],baij->Mbs-1); 677 #endif 678 row = im[i]; 679 v_t = v + i*bs2; 680 if (row >= rstart && row < rend) { 681 for (j=0; j<n; j++) { 682 col = in[j]; 683 684 /* Look up into the Hash Table */ 685 key = row*Nbs+col+1; 686 h1 = HASH(size,key,tmp); 687 688 idx = h1; 689 #if defined(PETSC_USE_BOPT_g) 690 total_ct++; 691 insert_ct++; 692 if (HT[idx] != key) { 693 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 694 if (idx == size) { 695 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 696 if (idx == h1) { 697 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col); 698 } 699 } 700 } 701 #else 702 if (HT[idx] != key) { 703 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 704 if (idx == size) { 705 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 706 if (idx == h1) { 707 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col); 708 } 709 } 710 } 711 #endif 712 baij_a = HD[idx]; 713 if (roworiented) { 714 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 715 /* value = v + (i*(stepval+bs)+j)*bs; */ 716 value = v_t; 717 v_t += bs; 718 if (addv == ADD_VALUES) { 719 for (ii=0; ii<bs; ii++,value+=stepval) { 720 for (jj=ii; jj<bs2; jj+=bs) { 721 baij_a[jj] += *value++; 722 } 723 } 724 } else { 725 for (ii=0; ii<bs; ii++,value+=stepval) { 726 for (jj=ii; jj<bs2; jj+=bs) { 727 baij_a[jj] = *value++; 728 } 729 } 730 } 731 } else { 732 value = v + j*(stepval+bs)*bs + i*bs; 733 if (addv == ADD_VALUES) { 734 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 735 for (jj=0; jj<bs; jj++) { 736 baij_a[jj] += *value++; 737 } 738 } 739 } else { 740 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 741 for (jj=0; jj<bs; jj++) { 742 baij_a[jj] = *value++; 743 } 744 } 745 } 746 } 747 } 748 } else { 749 if (!baij->donotstash) { 750 if (roworiented) { 751 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 752 } else { 753 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 754 } 755 } 756 } 757 } 758 #if defined(PETSC_USE_BOPT_g) 759 baij->ht_total_ct = total_ct; 760 baij->ht_insert_ct = insert_ct; 761 #endif 762 PetscFunctionReturn(0); 763 } 764 765 #undef __FUNCT__ 766 #define __FUNCT__ "MatGetValues_MPIBAIJ" 767 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 768 { 769 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 770 PetscErrorCode ierr; 771 int bs=baij->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 772 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 773 774 PetscFunctionBegin; 775 for (i=0; i<m; i++) { 776 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 777 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 778 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 779 row = idxm[i] - bsrstart; 780 for (j=0; j<n; j++) { 781 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]); 782 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 783 if (idxn[j] >= bscstart && idxn[j] < bscend){ 784 col = idxn[j] - bscstart; 785 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 786 } else { 787 if (!baij->colmap) { 788 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 789 } 790 #if defined (PETSC_USE_CTABLE) 791 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 792 data --; 793 #else 794 data = baij->colmap[idxn[j]/bs]-1; 795 #endif 796 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 797 else { 798 col = data + idxn[j]%bs; 799 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 800 } 801 } 802 } 803 } else { 804 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 805 } 806 } 807 PetscFunctionReturn(0); 808 } 809 810 #undef __FUNCT__ 811 #define __FUNCT__ "MatNorm_MPIBAIJ" 812 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 813 { 814 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 815 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 816 PetscErrorCode ierr; 817 int i,bs2=baij->bs2; 818 PetscReal sum = 0.0; 819 MatScalar *v; 820 821 PetscFunctionBegin; 822 if (baij->size == 1) { 823 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 824 } else { 825 if (type == NORM_FROBENIUS) { 826 v = amat->a; 827 for (i=0; i<amat->nz*bs2; i++) { 828 #if defined(PETSC_USE_COMPLEX) 829 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 830 #else 831 sum += (*v)*(*v); v++; 832 #endif 833 } 834 v = bmat->a; 835 for (i=0; i<bmat->nz*bs2; i++) { 836 #if defined(PETSC_USE_COMPLEX) 837 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 838 #else 839 sum += (*v)*(*v); v++; 840 #endif 841 } 842 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 843 *nrm = sqrt(*nrm); 844 } else { 845 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 846 } 847 } 848 PetscFunctionReturn(0); 849 } 850 851 852 /* 853 Creates the hash table, and sets the table 854 This table is created only once. 855 If new entried need to be added to the matrix 856 then the hash table has to be destroyed and 857 recreated. 858 */ 859 #undef __FUNCT__ 860 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 861 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 862 { 863 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 864 Mat A = baij->A,B=baij->B; 865 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 866 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 867 PetscErrorCode ierr; 868 int size,bs2=baij->bs2,rstart=baij->rstart; 869 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 870 int *HT,key; 871 MatScalar **HD; 872 PetscReal tmp; 873 #if defined(PETSC_USE_BOPT_g) 874 int ct=0,max=0; 875 #endif 876 877 PetscFunctionBegin; 878 baij->ht_size=(int)(factor*nz); 879 size = baij->ht_size; 880 881 if (baij->ht) { 882 PetscFunctionReturn(0); 883 } 884 885 /* Allocate Memory for Hash Table */ 886 ierr = PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 887 baij->ht = (int*)(baij->hd + size); 888 HD = baij->hd; 889 HT = baij->ht; 890 891 892 ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(PetscScalar*)));CHKERRQ(ierr); 893 894 895 /* Loop Over A */ 896 for (i=0; i<a->mbs; i++) { 897 for (j=ai[i]; j<ai[i+1]; j++) { 898 row = i+rstart; 899 col = aj[j]+cstart; 900 901 key = row*Nbs + col + 1; 902 h1 = HASH(size,key,tmp); 903 for (k=0; k<size; k++){ 904 if (!HT[(h1+k)%size]) { 905 HT[(h1+k)%size] = key; 906 HD[(h1+k)%size] = a->a + j*bs2; 907 break; 908 #if defined(PETSC_USE_BOPT_g) 909 } else { 910 ct++; 911 #endif 912 } 913 } 914 #if defined(PETSC_USE_BOPT_g) 915 if (k> max) max = k; 916 #endif 917 } 918 } 919 /* Loop Over B */ 920 for (i=0; i<b->mbs; i++) { 921 for (j=bi[i]; j<bi[i+1]; j++) { 922 row = i+rstart; 923 col = garray[bj[j]]; 924 key = row*Nbs + col + 1; 925 h1 = HASH(size,key,tmp); 926 for (k=0; k<size; k++){ 927 if (!HT[(h1+k)%size]) { 928 HT[(h1+k)%size] = key; 929 HD[(h1+k)%size] = b->a + j*bs2; 930 break; 931 #if defined(PETSC_USE_BOPT_g) 932 } else { 933 ct++; 934 #endif 935 } 936 } 937 #if defined(PETSC_USE_BOPT_g) 938 if (k> max) max = k; 939 #endif 940 } 941 } 942 943 /* Print Summary */ 944 #if defined(PETSC_USE_BOPT_g) 945 for (i=0,j=0; i<size; i++) { 946 if (HT[i]) {j++;} 947 } 948 PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max); 949 #endif 950 PetscFunctionReturn(0); 951 } 952 953 #undef __FUNCT__ 954 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 955 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 956 { 957 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 958 PetscErrorCode ierr; 959 int nstash,reallocs; 960 InsertMode addv; 961 962 PetscFunctionBegin; 963 if (baij->donotstash) { 964 PetscFunctionReturn(0); 965 } 966 967 /* make sure all processors are either in INSERTMODE or ADDMODE */ 968 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 969 if (addv == (ADD_VALUES|INSERT_VALUES)) { 970 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 971 } 972 mat->insertmode = addv; /* in case this processor had no cache */ 973 974 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 975 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 976 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 977 PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 978 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 979 PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 980 PetscFunctionReturn(0); 981 } 982 983 #undef __FUNCT__ 984 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 985 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 986 { 987 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 988 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 989 PetscErrorCode ierr; 990 int i,j,rstart,ncols,n,flg,bs2=baij->bs2; 991 int *row,*col,other_disassembled; 992 PetscTruth r1,r2,r3; 993 MatScalar *val; 994 InsertMode addv = mat->insertmode; 995 996 PetscFunctionBegin; 997 if (!baij->donotstash) { 998 while (1) { 999 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1000 if (!flg) break; 1001 1002 for (i=0; i<n;) { 1003 /* Now identify the consecutive vals belonging to the same row */ 1004 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1005 if (j < n) ncols = j-i; 1006 else ncols = n-i; 1007 /* Now assemble all these values with a single function call */ 1008 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 1009 i = j; 1010 } 1011 } 1012 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 1013 /* Now process the block-stash. Since the values are stashed column-oriented, 1014 set the roworiented flag to column oriented, and after MatSetValues() 1015 restore the original flags */ 1016 r1 = baij->roworiented; 1017 r2 = a->roworiented; 1018 r3 = b->roworiented; 1019 baij->roworiented = PETSC_FALSE; 1020 a->roworiented = PETSC_FALSE; 1021 b->roworiented = PETSC_FALSE; 1022 while (1) { 1023 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1024 if (!flg) break; 1025 1026 for (i=0; i<n;) { 1027 /* Now identify the consecutive vals belonging to the same row */ 1028 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1029 if (j < n) ncols = j-i; 1030 else ncols = n-i; 1031 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1032 i = j; 1033 } 1034 } 1035 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1036 baij->roworiented = r1; 1037 a->roworiented = r2; 1038 b->roworiented = r3; 1039 } 1040 1041 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1042 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1043 1044 /* determine if any processor has disassembled, if so we must 1045 also disassemble ourselfs, in order that we may reassemble. */ 1046 /* 1047 if nonzero structure of submatrix B cannot change then we know that 1048 no processor disassembled thus we can skip this stuff 1049 */ 1050 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1051 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1052 if (mat->was_assembled && !other_disassembled) { 1053 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1054 } 1055 } 1056 1057 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1058 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1059 } 1060 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1061 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1062 1063 #if defined(PETSC_USE_BOPT_g) 1064 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1065 PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 1066 baij->ht_total_ct = 0; 1067 baij->ht_insert_ct = 0; 1068 } 1069 #endif 1070 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1071 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1072 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1073 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1074 } 1075 1076 if (baij->rowvalues) { 1077 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1078 baij->rowvalues = 0; 1079 } 1080 PetscFunctionReturn(0); 1081 } 1082 1083 #undef __FUNCT__ 1084 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 1085 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1086 { 1087 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1088 PetscErrorCode ierr; 1089 int bs = baij->bs,size = baij->size,rank = baij->rank; 1090 PetscTruth iascii,isdraw; 1091 PetscViewer sviewer; 1092 PetscViewerFormat format; 1093 1094 PetscFunctionBegin; 1095 /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */ 1096 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1097 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1098 if (iascii) { 1099 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1100 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1101 MatInfo info; 1102 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1103 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1104 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 1105 rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 1106 baij->bs,(int)info.memory);CHKERRQ(ierr); 1107 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1108 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1109 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1110 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1111 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1112 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1113 PetscFunctionReturn(0); 1114 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1115 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 1116 PetscFunctionReturn(0); 1117 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1118 PetscFunctionReturn(0); 1119 } 1120 } 1121 1122 if (isdraw) { 1123 PetscDraw draw; 1124 PetscTruth isnull; 1125 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1126 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1127 } 1128 1129 if (size == 1) { 1130 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 1131 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1132 } else { 1133 /* assemble the entire matrix onto first processor. */ 1134 Mat A; 1135 Mat_SeqBAIJ *Aloc; 1136 int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1137 MatScalar *a; 1138 1139 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1140 /* Perhaps this should be the type of mat? */ 1141 if (!rank) { 1142 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 1143 } else { 1144 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 1145 } 1146 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1147 ierr = MatMPIBAIJSetPreallocation(A,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1148 PetscLogObjectParent(mat,A); 1149 1150 /* copy over the A part */ 1151 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1152 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1153 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 1154 1155 for (i=0; i<mbs; i++) { 1156 rvals[0] = bs*(baij->rstart + i); 1157 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1158 for (j=ai[i]; j<ai[i+1]; j++) { 1159 col = (baij->cstart+aj[j])*bs; 1160 for (k=0; k<bs; k++) { 1161 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1162 col++; a += bs; 1163 } 1164 } 1165 } 1166 /* copy over the B part */ 1167 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1168 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1169 for (i=0; i<mbs; i++) { 1170 rvals[0] = bs*(baij->rstart + i); 1171 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1172 for (j=ai[i]; j<ai[i+1]; j++) { 1173 col = baij->garray[aj[j]]*bs; 1174 for (k=0; k<bs; k++) { 1175 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1176 col++; a += bs; 1177 } 1178 } 1179 } 1180 ierr = PetscFree(rvals);CHKERRQ(ierr); 1181 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1182 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1183 /* 1184 Everyone has to call to draw the matrix since the graphics waits are 1185 synchronized across all processors that share the PetscDraw object 1186 */ 1187 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1188 if (!rank) { 1189 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1190 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1191 } 1192 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1193 ierr = MatDestroy(A);CHKERRQ(ierr); 1194 } 1195 PetscFunctionReturn(0); 1196 } 1197 1198 #undef __FUNCT__ 1199 #define __FUNCT__ "MatView_MPIBAIJ" 1200 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1201 { 1202 PetscErrorCode ierr; 1203 PetscTruth iascii,isdraw,issocket,isbinary; 1204 1205 PetscFunctionBegin; 1206 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1207 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1208 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1209 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1210 if (iascii || isdraw || issocket || isbinary) { 1211 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1212 } else { 1213 SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1214 } 1215 PetscFunctionReturn(0); 1216 } 1217 1218 #undef __FUNCT__ 1219 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1220 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1221 { 1222 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1223 PetscErrorCode ierr; 1224 1225 PetscFunctionBegin; 1226 #if defined(PETSC_USE_LOG) 1227 PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 1228 #endif 1229 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1230 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1231 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1232 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1233 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1234 #if defined (PETSC_USE_CTABLE) 1235 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1236 #else 1237 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1238 #endif 1239 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1240 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1241 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1242 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1243 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1244 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1245 #if defined(PETSC_USE_MAT_SINGLE) 1246 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 1247 #endif 1248 ierr = PetscFree(baij);CHKERRQ(ierr); 1249 PetscFunctionReturn(0); 1250 } 1251 1252 #undef __FUNCT__ 1253 #define __FUNCT__ "MatMult_MPIBAIJ" 1254 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1255 { 1256 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1257 PetscErrorCode ierr; 1258 int nt; 1259 1260 PetscFunctionBegin; 1261 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1262 if (nt != A->n) { 1263 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1264 } 1265 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1266 if (nt != A->m) { 1267 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1268 } 1269 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1270 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1271 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1272 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1273 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1274 PetscFunctionReturn(0); 1275 } 1276 1277 #undef __FUNCT__ 1278 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1279 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1280 { 1281 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1282 PetscErrorCode ierr; 1283 1284 PetscFunctionBegin; 1285 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1286 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1287 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1288 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1289 PetscFunctionReturn(0); 1290 } 1291 1292 #undef __FUNCT__ 1293 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1294 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1295 { 1296 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1297 PetscErrorCode ierr; 1298 1299 PetscFunctionBegin; 1300 /* do nondiagonal part */ 1301 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1302 /* send it on its way */ 1303 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1304 /* do local part */ 1305 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1306 /* receive remote parts: note this assumes the values are not actually */ 1307 /* inserted in yy until the next line, which is true for my implementation*/ 1308 /* but is not perhaps always true. */ 1309 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1310 PetscFunctionReturn(0); 1311 } 1312 1313 #undef __FUNCT__ 1314 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1315 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1316 { 1317 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1318 PetscErrorCode ierr; 1319 1320 PetscFunctionBegin; 1321 /* do nondiagonal part */ 1322 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1323 /* send it on its way */ 1324 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1325 /* do local part */ 1326 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1327 /* receive remote parts: note this assumes the values are not actually */ 1328 /* inserted in yy until the next line, which is true for my implementation*/ 1329 /* but is not perhaps always true. */ 1330 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1331 PetscFunctionReturn(0); 1332 } 1333 1334 /* 1335 This only works correctly for square matrices where the subblock A->A is the 1336 diagonal block 1337 */ 1338 #undef __FUNCT__ 1339 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1340 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1341 { 1342 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1343 PetscErrorCode ierr; 1344 1345 PetscFunctionBegin; 1346 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1347 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1348 PetscFunctionReturn(0); 1349 } 1350 1351 #undef __FUNCT__ 1352 #define __FUNCT__ "MatScale_MPIBAIJ" 1353 PetscErrorCode MatScale_MPIBAIJ(const PetscScalar *aa,Mat A) 1354 { 1355 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1356 PetscErrorCode ierr; 1357 1358 PetscFunctionBegin; 1359 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1360 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1361 PetscFunctionReturn(0); 1362 } 1363 1364 #undef __FUNCT__ 1365 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1366 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1367 { 1368 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1369 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1370 PetscErrorCode ierr; 1371 int bs = mat->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1372 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1373 int *cmap,*idx_p,cstart = mat->cstart; 1374 1375 PetscFunctionBegin; 1376 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1377 mat->getrowactive = PETSC_TRUE; 1378 1379 if (!mat->rowvalues && (idx || v)) { 1380 /* 1381 allocate enough space to hold information from the longest row. 1382 */ 1383 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1384 int max = 1,mbs = mat->mbs,tmp; 1385 for (i=0; i<mbs; i++) { 1386 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1387 if (max < tmp) { max = tmp; } 1388 } 1389 ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1390 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1391 } 1392 1393 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1394 lrow = row - brstart; 1395 1396 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1397 if (!v) {pvA = 0; pvB = 0;} 1398 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1399 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1400 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1401 nztot = nzA + nzB; 1402 1403 cmap = mat->garray; 1404 if (v || idx) { 1405 if (nztot) { 1406 /* Sort by increasing column numbers, assuming A and B already sorted */ 1407 int imark = -1; 1408 if (v) { 1409 *v = v_p = mat->rowvalues; 1410 for (i=0; i<nzB; i++) { 1411 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1412 else break; 1413 } 1414 imark = i; 1415 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1416 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1417 } 1418 if (idx) { 1419 *idx = idx_p = mat->rowindices; 1420 if (imark > -1) { 1421 for (i=0; i<imark; i++) { 1422 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1423 } 1424 } else { 1425 for (i=0; i<nzB; i++) { 1426 if (cmap[cworkB[i]/bs] < cstart) 1427 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1428 else break; 1429 } 1430 imark = i; 1431 } 1432 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1433 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1434 } 1435 } else { 1436 if (idx) *idx = 0; 1437 if (v) *v = 0; 1438 } 1439 } 1440 *nz = nztot; 1441 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1442 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1443 PetscFunctionReturn(0); 1444 } 1445 1446 #undef __FUNCT__ 1447 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1448 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1449 { 1450 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1451 1452 PetscFunctionBegin; 1453 if (baij->getrowactive == PETSC_FALSE) { 1454 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1455 } 1456 baij->getrowactive = PETSC_FALSE; 1457 PetscFunctionReturn(0); 1458 } 1459 1460 #undef __FUNCT__ 1461 #define __FUNCT__ "MatGetBlockSize_MPIBAIJ" 1462 PetscErrorCode MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1463 { 1464 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1465 1466 PetscFunctionBegin; 1467 *bs = baij->bs; 1468 PetscFunctionReturn(0); 1469 } 1470 1471 #undef __FUNCT__ 1472 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1473 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1474 { 1475 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1476 PetscErrorCode ierr; 1477 1478 PetscFunctionBegin; 1479 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1480 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1481 PetscFunctionReturn(0); 1482 } 1483 1484 #undef __FUNCT__ 1485 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1486 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1487 { 1488 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1489 Mat A = a->A,B = a->B; 1490 PetscErrorCode ierr; 1491 PetscReal isend[5],irecv[5]; 1492 1493 PetscFunctionBegin; 1494 info->block_size = (PetscReal)a->bs; 1495 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1496 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1497 isend[3] = info->memory; isend[4] = info->mallocs; 1498 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1499 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1500 isend[3] += info->memory; isend[4] += info->mallocs; 1501 if (flag == MAT_LOCAL) { 1502 info->nz_used = isend[0]; 1503 info->nz_allocated = isend[1]; 1504 info->nz_unneeded = isend[2]; 1505 info->memory = isend[3]; 1506 info->mallocs = isend[4]; 1507 } else if (flag == MAT_GLOBAL_MAX) { 1508 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1509 info->nz_used = irecv[0]; 1510 info->nz_allocated = irecv[1]; 1511 info->nz_unneeded = irecv[2]; 1512 info->memory = irecv[3]; 1513 info->mallocs = irecv[4]; 1514 } else if (flag == MAT_GLOBAL_SUM) { 1515 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1516 info->nz_used = irecv[0]; 1517 info->nz_allocated = irecv[1]; 1518 info->nz_unneeded = irecv[2]; 1519 info->memory = irecv[3]; 1520 info->mallocs = irecv[4]; 1521 } else { 1522 SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 1523 } 1524 info->rows_global = (PetscReal)A->M; 1525 info->columns_global = (PetscReal)A->N; 1526 info->rows_local = (PetscReal)A->m; 1527 info->columns_local = (PetscReal)A->N; 1528 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1529 info->fill_ratio_needed = 0; 1530 info->factor_mallocs = 0; 1531 PetscFunctionReturn(0); 1532 } 1533 1534 #undef __FUNCT__ 1535 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1536 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op) 1537 { 1538 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1539 PetscErrorCode ierr; 1540 1541 PetscFunctionBegin; 1542 switch (op) { 1543 case MAT_NO_NEW_NONZERO_LOCATIONS: 1544 case MAT_YES_NEW_NONZERO_LOCATIONS: 1545 case MAT_COLUMNS_UNSORTED: 1546 case MAT_COLUMNS_SORTED: 1547 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1548 case MAT_KEEP_ZEROED_ROWS: 1549 case MAT_NEW_NONZERO_LOCATION_ERR: 1550 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1551 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1552 break; 1553 case MAT_ROW_ORIENTED: 1554 a->roworiented = PETSC_TRUE; 1555 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1556 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1557 break; 1558 case MAT_ROWS_SORTED: 1559 case MAT_ROWS_UNSORTED: 1560 case MAT_YES_NEW_DIAGONALS: 1561 PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1562 break; 1563 case MAT_COLUMN_ORIENTED: 1564 a->roworiented = PETSC_FALSE; 1565 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1566 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1567 break; 1568 case MAT_IGNORE_OFF_PROC_ENTRIES: 1569 a->donotstash = PETSC_TRUE; 1570 break; 1571 case MAT_NO_NEW_DIAGONALS: 1572 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1573 case MAT_USE_HASH_TABLE: 1574 a->ht_flag = PETSC_TRUE; 1575 break; 1576 case MAT_SYMMETRIC: 1577 case MAT_STRUCTURALLY_SYMMETRIC: 1578 case MAT_HERMITIAN: 1579 case MAT_SYMMETRY_ETERNAL: 1580 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1581 break; 1582 case MAT_NOT_SYMMETRIC: 1583 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1584 case MAT_NOT_HERMITIAN: 1585 case MAT_NOT_SYMMETRY_ETERNAL: 1586 break; 1587 default: 1588 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1589 } 1590 PetscFunctionReturn(0); 1591 } 1592 1593 #undef __FUNCT__ 1594 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1595 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1596 { 1597 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1598 Mat_SeqBAIJ *Aloc; 1599 Mat B; 1600 PetscErrorCode ierr; 1601 int M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col; 1602 int bs=baij->bs,mbs=baij->mbs; 1603 MatScalar *a; 1604 1605 PetscFunctionBegin; 1606 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1607 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1608 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1609 ierr = MatMPIBAIJSetPreallocation(B,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1610 1611 /* copy over the A part */ 1612 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1613 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1614 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 1615 1616 for (i=0; i<mbs; i++) { 1617 rvals[0] = bs*(baij->rstart + i); 1618 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1619 for (j=ai[i]; j<ai[i+1]; j++) { 1620 col = (baij->cstart+aj[j])*bs; 1621 for (k=0; k<bs; k++) { 1622 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1623 col++; a += bs; 1624 } 1625 } 1626 } 1627 /* copy over the B part */ 1628 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1629 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1630 for (i=0; i<mbs; i++) { 1631 rvals[0] = bs*(baij->rstart + i); 1632 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1633 for (j=ai[i]; j<ai[i+1]; j++) { 1634 col = baij->garray[aj[j]]*bs; 1635 for (k=0; k<bs; k++) { 1636 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1637 col++; a += bs; 1638 } 1639 } 1640 } 1641 ierr = PetscFree(rvals);CHKERRQ(ierr); 1642 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1643 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1644 1645 if (matout) { 1646 *matout = B; 1647 } else { 1648 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1649 } 1650 PetscFunctionReturn(0); 1651 } 1652 1653 #undef __FUNCT__ 1654 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1655 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1656 { 1657 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1658 Mat a = baij->A,b = baij->B; 1659 PetscErrorCode ierr; 1660 int s1,s2,s3; 1661 1662 PetscFunctionBegin; 1663 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1664 if (rr) { 1665 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1666 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1667 /* Overlap communication with computation. */ 1668 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1669 } 1670 if (ll) { 1671 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1672 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1673 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1674 } 1675 /* scale the diagonal block */ 1676 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1677 1678 if (rr) { 1679 /* Do a scatter end and then right scale the off-diagonal block */ 1680 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1681 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1682 } 1683 1684 PetscFunctionReturn(0); 1685 } 1686 1687 #undef __FUNCT__ 1688 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1689 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag) 1690 { 1691 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1692 PetscErrorCode ierr; 1693 int i,N,*rows,*owners = l->rowners,size = l->size; 1694 int *nprocs,j,idx,nsends,row; 1695 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1696 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1697 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1698 MPI_Comm comm = A->comm; 1699 MPI_Request *send_waits,*recv_waits; 1700 MPI_Status recv_status,*send_status; 1701 IS istmp; 1702 PetscTruth found; 1703 1704 PetscFunctionBegin; 1705 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 1706 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1707 1708 /* first count number of contributors to each processor */ 1709 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 1710 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1711 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 1712 for (i=0; i<N; i++) { 1713 idx = rows[i]; 1714 found = PETSC_FALSE; 1715 for (j=0; j<size; j++) { 1716 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1717 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 1718 } 1719 } 1720 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1721 } 1722 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1723 1724 /* inform other processors of number of messages and max length*/ 1725 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1726 1727 /* post receives: */ 1728 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 1729 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1730 for (i=0; i<nrecvs; i++) { 1731 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1732 } 1733 1734 /* do sends: 1735 1) starts[i] gives the starting index in svalues for stuff going to 1736 the ith processor 1737 */ 1738 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 1739 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1740 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 1741 starts[0] = 0; 1742 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1743 for (i=0; i<N; i++) { 1744 svalues[starts[owner[i]]++] = rows[i]; 1745 } 1746 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1747 1748 starts[0] = 0; 1749 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1750 count = 0; 1751 for (i=0; i<size; i++) { 1752 if (nprocs[2*i+1]) { 1753 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1754 } 1755 } 1756 ierr = PetscFree(starts);CHKERRQ(ierr); 1757 1758 base = owners[rank]*bs; 1759 1760 /* wait on receives */ 1761 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 1762 source = lens + nrecvs; 1763 count = nrecvs; slen = 0; 1764 while (count) { 1765 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1766 /* unpack receives into our local space */ 1767 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1768 source[imdex] = recv_status.MPI_SOURCE; 1769 lens[imdex] = n; 1770 slen += n; 1771 count--; 1772 } 1773 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1774 1775 /* move the data into the send scatter */ 1776 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 1777 count = 0; 1778 for (i=0; i<nrecvs; i++) { 1779 values = rvalues + i*nmax; 1780 for (j=0; j<lens[i]; j++) { 1781 lrows[count++] = values[j] - base; 1782 } 1783 } 1784 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1785 ierr = PetscFree(lens);CHKERRQ(ierr); 1786 ierr = PetscFree(owner);CHKERRQ(ierr); 1787 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1788 1789 /* actually zap the local rows */ 1790 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1791 PetscLogObjectParent(A,istmp); 1792 1793 /* 1794 Zero the required rows. If the "diagonal block" of the matrix 1795 is square and the user wishes to set the diagonal we use seperate 1796 code so that MatSetValues() is not called for each diagonal allocating 1797 new memory, thus calling lots of mallocs and slowing things down. 1798 1799 Contributed by: Mathew Knepley 1800 */ 1801 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1802 ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1803 if (diag && (l->A->M == l->A->N)) { 1804 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1805 } else if (diag) { 1806 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1807 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1808 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1809 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1810 } 1811 for (i=0; i<slen; i++) { 1812 row = lrows[i] + rstart_bs; 1813 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1814 } 1815 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1816 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1817 } else { 1818 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1819 } 1820 1821 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1822 ierr = PetscFree(lrows);CHKERRQ(ierr); 1823 1824 /* wait on sends */ 1825 if (nsends) { 1826 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1827 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1828 ierr = PetscFree(send_status);CHKERRQ(ierr); 1829 } 1830 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1831 ierr = PetscFree(svalues);CHKERRQ(ierr); 1832 1833 PetscFunctionReturn(0); 1834 } 1835 1836 #undef __FUNCT__ 1837 #define __FUNCT__ "MatPrintHelp_MPIBAIJ" 1838 PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A) 1839 { 1840 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1841 MPI_Comm comm = A->comm; 1842 static int called = 0; 1843 PetscErrorCode ierr; 1844 1845 PetscFunctionBegin; 1846 if (!a->rank) { 1847 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1848 } 1849 if (called) {PetscFunctionReturn(0);} else called = 1; 1850 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1851 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1852 PetscFunctionReturn(0); 1853 } 1854 1855 #undef __FUNCT__ 1856 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1857 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1858 { 1859 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1860 PetscErrorCode ierr; 1861 1862 PetscFunctionBegin; 1863 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1864 PetscFunctionReturn(0); 1865 } 1866 1867 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1868 1869 #undef __FUNCT__ 1870 #define __FUNCT__ "MatEqual_MPIBAIJ" 1871 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1872 { 1873 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1874 Mat a,b,c,d; 1875 PetscTruth flg; 1876 PetscErrorCode ierr; 1877 1878 PetscFunctionBegin; 1879 a = matA->A; b = matA->B; 1880 c = matB->A; d = matB->B; 1881 1882 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1883 if (flg == PETSC_TRUE) { 1884 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1885 } 1886 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1887 PetscFunctionReturn(0); 1888 } 1889 1890 1891 #undef __FUNCT__ 1892 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1893 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1894 { 1895 PetscErrorCode ierr; 1896 1897 PetscFunctionBegin; 1898 ierr = MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1899 PetscFunctionReturn(0); 1900 } 1901 1902 /* -------------------------------------------------------------------*/ 1903 static struct _MatOps MatOps_Values = { 1904 MatSetValues_MPIBAIJ, 1905 MatGetRow_MPIBAIJ, 1906 MatRestoreRow_MPIBAIJ, 1907 MatMult_MPIBAIJ, 1908 /* 4*/ MatMultAdd_MPIBAIJ, 1909 MatMultTranspose_MPIBAIJ, 1910 MatMultTransposeAdd_MPIBAIJ, 1911 0, 1912 0, 1913 0, 1914 /*10*/ 0, 1915 0, 1916 0, 1917 0, 1918 MatTranspose_MPIBAIJ, 1919 /*15*/ MatGetInfo_MPIBAIJ, 1920 MatEqual_MPIBAIJ, 1921 MatGetDiagonal_MPIBAIJ, 1922 MatDiagonalScale_MPIBAIJ, 1923 MatNorm_MPIBAIJ, 1924 /*20*/ MatAssemblyBegin_MPIBAIJ, 1925 MatAssemblyEnd_MPIBAIJ, 1926 0, 1927 MatSetOption_MPIBAIJ, 1928 MatZeroEntries_MPIBAIJ, 1929 /*25*/ MatZeroRows_MPIBAIJ, 1930 0, 1931 0, 1932 0, 1933 0, 1934 /*30*/ MatSetUpPreallocation_MPIBAIJ, 1935 0, 1936 0, 1937 0, 1938 0, 1939 /*35*/ MatDuplicate_MPIBAIJ, 1940 0, 1941 0, 1942 0, 1943 0, 1944 /*40*/ 0, 1945 MatGetSubMatrices_MPIBAIJ, 1946 MatIncreaseOverlap_MPIBAIJ, 1947 MatGetValues_MPIBAIJ, 1948 0, 1949 /*45*/ MatPrintHelp_MPIBAIJ, 1950 MatScale_MPIBAIJ, 1951 0, 1952 0, 1953 0, 1954 /*50*/ MatGetBlockSize_MPIBAIJ, 1955 0, 1956 0, 1957 0, 1958 0, 1959 /*55*/ 0, 1960 0, 1961 MatSetUnfactored_MPIBAIJ, 1962 0, 1963 MatSetValuesBlocked_MPIBAIJ, 1964 /*60*/ 0, 1965 MatDestroy_MPIBAIJ, 1966 MatView_MPIBAIJ, 1967 MatGetPetscMaps_Petsc, 1968 0, 1969 /*65*/ 0, 1970 0, 1971 0, 1972 0, 1973 0, 1974 /*70*/ MatGetRowMax_MPIBAIJ, 1975 0, 1976 0, 1977 0, 1978 0, 1979 /*75*/ 0, 1980 0, 1981 0, 1982 0, 1983 0, 1984 /*80*/ 0, 1985 0, 1986 0, 1987 0, 1988 /*85*/ MatLoad_MPIBAIJ 1989 }; 1990 1991 1992 EXTERN_C_BEGIN 1993 #undef __FUNCT__ 1994 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 1995 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1996 { 1997 PetscFunctionBegin; 1998 *a = ((Mat_MPIBAIJ *)A->data)->A; 1999 *iscopy = PETSC_FALSE; 2000 PetscFunctionReturn(0); 2001 } 2002 EXTERN_C_END 2003 2004 EXTERN_C_BEGIN 2005 extern int MatConvert_MPIBAIJ_MPISBAIJ(Mat,const MatType,Mat*); 2006 EXTERN_C_END 2007 2008 EXTERN_C_BEGIN 2009 #undef __FUNCT__ 2010 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2011 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2012 { 2013 Mat_MPIBAIJ *b; 2014 PetscErrorCode ierr; 2015 int i; 2016 2017 PetscFunctionBegin; 2018 B->preallocated = PETSC_TRUE; 2019 ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2020 2021 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2022 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2023 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2024 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2025 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2026 if (d_nnz) { 2027 for (i=0; i<B->m/bs; i++) { 2028 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 2029 } 2030 } 2031 if (o_nnz) { 2032 for (i=0; i<B->m/bs; i++) { 2033 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 2034 } 2035 } 2036 2037 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 2038 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 2039 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 2040 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 2041 2042 b = (Mat_MPIBAIJ*)B->data; 2043 b->bs = bs; 2044 b->bs2 = bs*bs; 2045 b->mbs = B->m/bs; 2046 b->nbs = B->n/bs; 2047 b->Mbs = B->M/bs; 2048 b->Nbs = B->N/bs; 2049 2050 ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 2051 b->rowners[0] = 0; 2052 for (i=2; i<=b->size; i++) { 2053 b->rowners[i] += b->rowners[i-1]; 2054 } 2055 b->rstart = b->rowners[b->rank]; 2056 b->rend = b->rowners[b->rank+1]; 2057 2058 ierr = MPI_Allgather(&b->nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 2059 b->cowners[0] = 0; 2060 for (i=2; i<=b->size; i++) { 2061 b->cowners[i] += b->cowners[i-1]; 2062 } 2063 b->cstart = b->cowners[b->rank]; 2064 b->cend = b->cowners[b->rank+1]; 2065 2066 for (i=0; i<=b->size; i++) { 2067 b->rowners_bs[i] = b->rowners[i]*bs; 2068 } 2069 b->rstart_bs = b->rstart*bs; 2070 b->rend_bs = b->rend*bs; 2071 b->cstart_bs = b->cstart*bs; 2072 b->cend_bs = b->cend*bs; 2073 2074 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 2075 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2076 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2077 PetscLogObjectParent(B,b->A); 2078 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 2079 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2080 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2081 PetscLogObjectParent(B,b->B); 2082 2083 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2084 2085 PetscFunctionReturn(0); 2086 } 2087 EXTERN_C_END 2088 2089 EXTERN_C_BEGIN 2090 EXTERN PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2091 EXTERN PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2092 EXTERN_C_END 2093 2094 /*MC 2095 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2096 2097 Options Database Keys: 2098 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2099 2100 Level: beginner 2101 2102 .seealso: MatCreateMPIBAIJ 2103 M*/ 2104 2105 EXTERN_C_BEGIN 2106 #undef __FUNCT__ 2107 #define __FUNCT__ "MatCreate_MPIBAIJ" 2108 PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2109 { 2110 Mat_MPIBAIJ *b; 2111 PetscErrorCode ierr; 2112 PetscTruth flg; 2113 2114 PetscFunctionBegin; 2115 ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2116 B->data = (void*)b; 2117 2118 ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 2119 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2120 B->mapping = 0; 2121 B->factor = 0; 2122 B->assembled = PETSC_FALSE; 2123 2124 B->insertmode = NOT_SET_VALUES; 2125 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 2126 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 2127 2128 /* build local table of row and column ownerships */ 2129 ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 2130 PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2131 b->cowners = b->rowners + b->size + 2; 2132 b->rowners_bs = b->cowners + b->size + 2; 2133 2134 /* build cache for off array entries formed */ 2135 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2136 b->donotstash = PETSC_FALSE; 2137 b->colmap = PETSC_NULL; 2138 b->garray = PETSC_NULL; 2139 b->roworiented = PETSC_TRUE; 2140 2141 #if defined(PETSC_USE_MAT_SINGLE) 2142 /* stuff for MatSetValues_XXX in single precision */ 2143 b->setvalueslen = 0; 2144 b->setvaluescopy = PETSC_NULL; 2145 #endif 2146 2147 /* stuff used in block assembly */ 2148 b->barray = 0; 2149 2150 /* stuff used for matrix vector multiply */ 2151 b->lvec = 0; 2152 b->Mvctx = 0; 2153 2154 /* stuff for MatGetRow() */ 2155 b->rowindices = 0; 2156 b->rowvalues = 0; 2157 b->getrowactive = PETSC_FALSE; 2158 2159 /* hash table stuff */ 2160 b->ht = 0; 2161 b->hd = 0; 2162 b->ht_size = 0; 2163 b->ht_flag = PETSC_FALSE; 2164 b->ht_fact = 0; 2165 b->ht_total_ct = 0; 2166 b->ht_insert_ct = 0; 2167 2168 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2169 if (flg) { 2170 PetscReal fact = 1.39; 2171 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2172 ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2173 if (fact <= 1.0) fact = 1.39; 2174 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2175 PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2176 } 2177 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2178 "MatStoreValues_MPIBAIJ", 2179 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2180 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2181 "MatRetrieveValues_MPIBAIJ", 2182 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2183 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2184 "MatGetDiagonalBlock_MPIBAIJ", 2185 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2186 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2187 "MatMPIBAIJSetPreallocation_MPIBAIJ", 2188 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2189 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 2190 "MatDiagonalScaleLocal_MPIBAIJ", 2191 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2192 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 2193 "MatSetHashTableFactor_MPIBAIJ", 2194 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2195 PetscFunctionReturn(0); 2196 } 2197 EXTERN_C_END 2198 2199 /*MC 2200 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2201 2202 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2203 and MATMPIBAIJ otherwise. 2204 2205 Options Database Keys: 2206 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2207 2208 Level: beginner 2209 2210 .seealso: MatCreateMPIBAIJ,MATSEQBAIJ,MATMPIBAIJ 2211 M*/ 2212 2213 EXTERN_C_BEGIN 2214 #undef __FUNCT__ 2215 #define __FUNCT__ "MatCreate_BAIJ" 2216 PetscErrorCode MatCreate_BAIJ(Mat A) 2217 { 2218 PetscErrorCode ierr; 2219 int size; 2220 2221 PetscFunctionBegin; 2222 ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr); 2223 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2224 if (size == 1) { 2225 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 2226 } else { 2227 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 2228 } 2229 PetscFunctionReturn(0); 2230 } 2231 EXTERN_C_END 2232 2233 #undef __FUNCT__ 2234 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2235 /*@C 2236 MatMPIBAIJSetPreallocation - Creates a sparse parallel matrix in block AIJ format 2237 (block compressed row). For good matrix assembly performance 2238 the user should preallocate the matrix storage by setting the parameters 2239 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2240 performance can be increased by more than a factor of 50. 2241 2242 Collective on Mat 2243 2244 Input Parameters: 2245 + A - the matrix 2246 . bs - size of blockk 2247 . d_nz - number of block nonzeros per block row in diagonal portion of local 2248 submatrix (same for all local rows) 2249 . d_nnz - array containing the number of block nonzeros in the various block rows 2250 of the in diagonal portion of the local (possibly different for each block 2251 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2252 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2253 submatrix (same for all local rows). 2254 - o_nnz - array containing the number of nonzeros in the various block rows of the 2255 off-diagonal portion of the local submatrix (possibly different for 2256 each block row) or PETSC_NULL. 2257 2258 Output Parameter: 2259 2260 2261 Options Database Keys: 2262 . -mat_no_unroll - uses code that does not unroll the loops in the 2263 block calculations (much slower) 2264 . -mat_block_size - size of the blocks to use 2265 2266 Notes: 2267 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2268 than it must be used on all processors that share the object for that argument. 2269 2270 Storage Information: 2271 For a square global matrix we define each processor's diagonal portion 2272 to be its local rows and the corresponding columns (a square submatrix); 2273 each processor's off-diagonal portion encompasses the remainder of the 2274 local matrix (a rectangular submatrix). 2275 2276 The user can specify preallocated storage for the diagonal part of 2277 the local submatrix with either d_nz or d_nnz (not both). Set 2278 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2279 memory allocation. Likewise, specify preallocated storage for the 2280 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2281 2282 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2283 the figure below we depict these three local rows and all columns (0-11). 2284 2285 .vb 2286 0 1 2 3 4 5 6 7 8 9 10 11 2287 ------------------- 2288 row 3 | o o o d d d o o o o o o 2289 row 4 | o o o d d d o o o o o o 2290 row 5 | o o o d d d o o o o o o 2291 ------------------- 2292 .ve 2293 2294 Thus, any entries in the d locations are stored in the d (diagonal) 2295 submatrix, and any entries in the o locations are stored in the 2296 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2297 stored simply in the MATSEQBAIJ format for compressed row storage. 2298 2299 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2300 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2301 In general, for PDE problems in which most nonzeros are near the diagonal, 2302 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2303 or you will get TERRIBLE performance; see the users' manual chapter on 2304 matrices. 2305 2306 Level: intermediate 2307 2308 .keywords: matrix, block, aij, compressed row, sparse, parallel 2309 2310 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2311 @*/ 2312 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 2313 { 2314 PetscErrorCode ierr,(*f)(Mat,int,int,const int[],int,const int[]); 2315 2316 PetscFunctionBegin; 2317 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2318 if (f) { 2319 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2320 } 2321 PetscFunctionReturn(0); 2322 } 2323 2324 #undef __FUNCT__ 2325 #define __FUNCT__ "MatCreateMPIBAIJ" 2326 /*@C 2327 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 2328 (block compressed row). For good matrix assembly performance 2329 the user should preallocate the matrix storage by setting the parameters 2330 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2331 performance can be increased by more than a factor of 50. 2332 2333 Collective on MPI_Comm 2334 2335 Input Parameters: 2336 + comm - MPI communicator 2337 . bs - size of blockk 2338 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2339 This value should be the same as the local size used in creating the 2340 y vector for the matrix-vector product y = Ax. 2341 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2342 This value should be the same as the local size used in creating the 2343 x vector for the matrix-vector product y = Ax. 2344 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2345 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2346 . d_nz - number of nonzero blocks per block row in diagonal portion of local 2347 submatrix (same for all local rows) 2348 . d_nnz - array containing the number of nonzero blocks in the various block rows 2349 of the in diagonal portion of the local (possibly different for each block 2350 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2351 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 2352 submatrix (same for all local rows). 2353 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 2354 off-diagonal portion of the local submatrix (possibly different for 2355 each block row) or PETSC_NULL. 2356 2357 Output Parameter: 2358 . A - the matrix 2359 2360 Options Database Keys: 2361 . -mat_no_unroll - uses code that does not unroll the loops in the 2362 block calculations (much slower) 2363 . -mat_block_size - size of the blocks to use 2364 2365 Notes: 2366 A nonzero block is any block that as 1 or more nonzeros in it 2367 2368 The user MUST specify either the local or global matrix dimensions 2369 (possibly both). 2370 2371 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2372 than it must be used on all processors that share the object for that argument. 2373 2374 Storage Information: 2375 For a square global matrix we define each processor's diagonal portion 2376 to be its local rows and the corresponding columns (a square submatrix); 2377 each processor's off-diagonal portion encompasses the remainder of the 2378 local matrix (a rectangular submatrix). 2379 2380 The user can specify preallocated storage for the diagonal part of 2381 the local submatrix with either d_nz or d_nnz (not both). Set 2382 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2383 memory allocation. Likewise, specify preallocated storage for the 2384 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2385 2386 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2387 the figure below we depict these three local rows and all columns (0-11). 2388 2389 .vb 2390 0 1 2 3 4 5 6 7 8 9 10 11 2391 ------------------- 2392 row 3 | o o o d d d o o o o o o 2393 row 4 | o o o d d d o o o o o o 2394 row 5 | o o o d d d o o o o o o 2395 ------------------- 2396 .ve 2397 2398 Thus, any entries in the d locations are stored in the d (diagonal) 2399 submatrix, and any entries in the o locations are stored in the 2400 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2401 stored simply in the MATSEQBAIJ format for compressed row storage. 2402 2403 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2404 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2405 In general, for PDE problems in which most nonzeros are near the diagonal, 2406 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2407 or you will get TERRIBLE performance; see the users' manual chapter on 2408 matrices. 2409 2410 Level: intermediate 2411 2412 .keywords: matrix, block, aij, compressed row, sparse, parallel 2413 2414 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2415 @*/ 2416 PetscErrorCode MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2417 { 2418 PetscErrorCode ierr; 2419 int size; 2420 2421 PetscFunctionBegin; 2422 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2423 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2424 if (size > 1) { 2425 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2426 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2427 } else { 2428 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2429 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2430 } 2431 PetscFunctionReturn(0); 2432 } 2433 2434 #undef __FUNCT__ 2435 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 2436 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2437 { 2438 Mat mat; 2439 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2440 PetscErrorCode ierr; 2441 int len=0; 2442 2443 PetscFunctionBegin; 2444 *newmat = 0; 2445 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2446 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2447 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2448 2449 mat->factor = matin->factor; 2450 mat->preallocated = PETSC_TRUE; 2451 mat->assembled = PETSC_TRUE; 2452 mat->insertmode = NOT_SET_VALUES; 2453 2454 a = (Mat_MPIBAIJ*)mat->data; 2455 a->bs = oldmat->bs; 2456 a->bs2 = oldmat->bs2; 2457 a->mbs = oldmat->mbs; 2458 a->nbs = oldmat->nbs; 2459 a->Mbs = oldmat->Mbs; 2460 a->Nbs = oldmat->Nbs; 2461 2462 a->rstart = oldmat->rstart; 2463 a->rend = oldmat->rend; 2464 a->cstart = oldmat->cstart; 2465 a->cend = oldmat->cend; 2466 a->size = oldmat->size; 2467 a->rank = oldmat->rank; 2468 a->donotstash = oldmat->donotstash; 2469 a->roworiented = oldmat->roworiented; 2470 a->rowindices = 0; 2471 a->rowvalues = 0; 2472 a->getrowactive = PETSC_FALSE; 2473 a->barray = 0; 2474 a->rstart_bs = oldmat->rstart_bs; 2475 a->rend_bs = oldmat->rend_bs; 2476 a->cstart_bs = oldmat->cstart_bs; 2477 a->cend_bs = oldmat->cend_bs; 2478 2479 /* hash table stuff */ 2480 a->ht = 0; 2481 a->hd = 0; 2482 a->ht_size = 0; 2483 a->ht_flag = oldmat->ht_flag; 2484 a->ht_fact = oldmat->ht_fact; 2485 a->ht_total_ct = 0; 2486 a->ht_insert_ct = 0; 2487 2488 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2489 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2490 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2491 if (oldmat->colmap) { 2492 #if defined (PETSC_USE_CTABLE) 2493 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2494 #else 2495 ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr); 2496 PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2497 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2498 #endif 2499 } else a->colmap = 0; 2500 2501 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2502 ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr); 2503 PetscLogObjectMemory(mat,len*sizeof(int)); 2504 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2505 } else a->garray = 0; 2506 2507 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2508 PetscLogObjectParent(mat,a->lvec); 2509 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2510 PetscLogObjectParent(mat,a->Mvctx); 2511 2512 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2513 PetscLogObjectParent(mat,a->A); 2514 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2515 PetscLogObjectParent(mat,a->B); 2516 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2517 *newmat = mat; 2518 2519 PetscFunctionReturn(0); 2520 } 2521 2522 #include "petscsys.h" 2523 2524 #undef __FUNCT__ 2525 #define __FUNCT__ "MatLoad_MPIBAIJ" 2526 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 2527 { 2528 Mat A; 2529 PetscErrorCode ierr; 2530 int i,nz,j,rstart,rend,fd; 2531 PetscScalar *vals,*buf; 2532 MPI_Comm comm = ((PetscObject)viewer)->comm; 2533 MPI_Status status; 2534 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2535 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2536 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2537 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2538 int dcount,kmax,k,nzcount,tmp; 2539 2540 PetscFunctionBegin; 2541 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2542 2543 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2544 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2545 if (!rank) { 2546 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2547 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2548 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2549 if (header[3] < 0) { 2550 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ"); 2551 } 2552 } 2553 2554 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2555 M = header[1]; N = header[2]; 2556 2557 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2558 2559 /* 2560 This code adds extra rows to make sure the number of rows is 2561 divisible by the blocksize 2562 */ 2563 Mbs = M/bs; 2564 extra_rows = bs - M + bs*(Mbs); 2565 if (extra_rows == bs) extra_rows = 0; 2566 else Mbs++; 2567 if (extra_rows &&!rank) { 2568 PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2569 } 2570 2571 /* determine ownership of all rows */ 2572 mbs = Mbs/size + ((Mbs % size) > rank); 2573 m = mbs*bs; 2574 ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2575 browners = rowners + size + 1; 2576 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2577 rowners[0] = 0; 2578 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2579 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2580 rstart = rowners[rank]; 2581 rend = rowners[rank+1]; 2582 2583 /* distribute row lengths to all processors */ 2584 ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr); 2585 if (!rank) { 2586 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2587 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2588 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2589 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2590 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2591 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2592 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2593 } else { 2594 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2595 } 2596 2597 if (!rank) { 2598 /* calculate the number of nonzeros on each processor */ 2599 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2600 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2601 for (i=0; i<size; i++) { 2602 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2603 procsnz[i] += rowlengths[j]; 2604 } 2605 } 2606 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2607 2608 /* determine max buffer needed and allocate it */ 2609 maxnz = 0; 2610 for (i=0; i<size; i++) { 2611 maxnz = PetscMax(maxnz,procsnz[i]); 2612 } 2613 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2614 2615 /* read in my part of the matrix column indices */ 2616 nz = procsnz[0]; 2617 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2618 mycols = ibuf; 2619 if (size == 1) nz -= extra_rows; 2620 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2621 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2622 2623 /* read in every ones (except the last) and ship off */ 2624 for (i=1; i<size-1; i++) { 2625 nz = procsnz[i]; 2626 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2627 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2628 } 2629 /* read in the stuff for the last proc */ 2630 if (size != 1) { 2631 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2632 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2633 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2634 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2635 } 2636 ierr = PetscFree(cols);CHKERRQ(ierr); 2637 } else { 2638 /* determine buffer space needed for message */ 2639 nz = 0; 2640 for (i=0; i<m; i++) { 2641 nz += locrowlens[i]; 2642 } 2643 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2644 mycols = ibuf; 2645 /* receive message of column indices*/ 2646 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2647 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2648 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2649 } 2650 2651 /* loop over local rows, determining number of off diagonal entries */ 2652 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2653 odlens = dlens + (rend-rstart); 2654 ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr); 2655 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2656 masked1 = mask + Mbs; 2657 masked2 = masked1 + Mbs; 2658 rowcount = 0; nzcount = 0; 2659 for (i=0; i<mbs; i++) { 2660 dcount = 0; 2661 odcount = 0; 2662 for (j=0; j<bs; j++) { 2663 kmax = locrowlens[rowcount]; 2664 for (k=0; k<kmax; k++) { 2665 tmp = mycols[nzcount++]/bs; 2666 if (!mask[tmp]) { 2667 mask[tmp] = 1; 2668 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2669 else masked1[dcount++] = tmp; 2670 } 2671 } 2672 rowcount++; 2673 } 2674 2675 dlens[i] = dcount; 2676 odlens[i] = odcount; 2677 2678 /* zero out the mask elements we set */ 2679 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2680 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2681 } 2682 2683 /* create our matrix */ 2684 ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr); 2685 ierr = MatSetType(A,type);CHKERRQ(ierr) 2686 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2687 2688 /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */ 2689 MatSetOption(A,MAT_COLUMNS_SORTED); 2690 2691 if (!rank) { 2692 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2693 /* read in my part of the matrix numerical values */ 2694 nz = procsnz[0]; 2695 vals = buf; 2696 mycols = ibuf; 2697 if (size == 1) nz -= extra_rows; 2698 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2699 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2700 2701 /* insert into matrix */ 2702 jj = rstart*bs; 2703 for (i=0; i<m; i++) { 2704 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2705 mycols += locrowlens[i]; 2706 vals += locrowlens[i]; 2707 jj++; 2708 } 2709 /* read in other processors (except the last one) and ship out */ 2710 for (i=1; i<size-1; i++) { 2711 nz = procsnz[i]; 2712 vals = buf; 2713 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2714 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2715 } 2716 /* the last proc */ 2717 if (size != 1){ 2718 nz = procsnz[i] - extra_rows; 2719 vals = buf; 2720 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2721 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2722 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2723 } 2724 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2725 } else { 2726 /* receive numeric values */ 2727 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2728 2729 /* receive message of values*/ 2730 vals = buf; 2731 mycols = ibuf; 2732 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2733 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2734 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2735 2736 /* insert into matrix */ 2737 jj = rstart*bs; 2738 for (i=0; i<m; i++) { 2739 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2740 mycols += locrowlens[i]; 2741 vals += locrowlens[i]; 2742 jj++; 2743 } 2744 } 2745 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2746 ierr = PetscFree(buf);CHKERRQ(ierr); 2747 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2748 ierr = PetscFree(rowners);CHKERRQ(ierr); 2749 ierr = PetscFree(dlens);CHKERRQ(ierr); 2750 ierr = PetscFree(mask);CHKERRQ(ierr); 2751 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2752 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2753 2754 *newmat = A; 2755 PetscFunctionReturn(0); 2756 } 2757 2758 #undef __FUNCT__ 2759 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2760 /*@ 2761 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2762 2763 Input Parameters: 2764 . mat - the matrix 2765 . fact - factor 2766 2767 Collective on Mat 2768 2769 Level: advanced 2770 2771 Notes: 2772 This can also be set by the command line option: -mat_use_hash_table fact 2773 2774 .keywords: matrix, hashtable, factor, HT 2775 2776 .seealso: MatSetOption() 2777 @*/ 2778 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2779 { 2780 PetscErrorCode ierr,(*f)(Mat,PetscReal); 2781 2782 PetscFunctionBegin; 2783 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 2784 if (f) { 2785 ierr = (*f)(mat,fact);CHKERRQ(ierr); 2786 } 2787 PetscFunctionReturn(0); 2788 } 2789 2790 #undef __FUNCT__ 2791 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 2792 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 2793 { 2794 Mat_MPIBAIJ *baij; 2795 2796 PetscFunctionBegin; 2797 baij = (Mat_MPIBAIJ*)mat->data; 2798 baij->ht_fact = fact; 2799 PetscFunctionReturn(0); 2800 } 2801 2802 #undef __FUNCT__ 2803 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 2804 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[]) 2805 { 2806 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2807 PetscFunctionBegin; 2808 *Ad = a->A; 2809 *Ao = a->B; 2810 *colmap = a->garray; 2811 PetscFunctionReturn(0); 2812 } 2813