1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "src/vec/vecimpl.h" 5 #include "mpisbaij.h" 6 #include "src/mat/impls/sbaij/seq/sbaij.h" 7 8 extern int MatSetUpMultiply_MPISBAIJ(Mat); 9 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat); 10 extern int DisAssemble_MPISBAIJ(Mat); 11 extern int MatIncreaseOverlap_MPISBAIJ(Mat,int,IS *,int); 12 extern int MatGetSubMatrices_MPISBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 13 extern int MatGetValues_SeqSBAIJ(Mat,int,int *,int,int *,PetscScalar *); 14 extern int MatSetValues_SeqSBAIJ(Mat,int,int *,int,int *,PetscScalar *,InsertMode); 15 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 16 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 17 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**); 18 extern int MatPrintHelp_SeqSBAIJ(Mat); 19 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 20 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 21 extern int MatGetRowMax_MPISBAIJ(Mat,Vec); 22 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec); 23 24 /* UGLY, ugly, ugly 25 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 26 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 27 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 28 converts the entries into single precision and then calls ..._MatScalar() to put them 29 into the single precision data structures. 30 */ 31 #if defined(PETSC_USE_MAT_SINGLE) 32 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 33 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 34 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 35 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 36 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 37 #else 38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 39 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 41 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 43 #endif 44 45 EXTERN_C_BEGIN 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 48 int MatStoreValues_MPISBAIJ(Mat mat) 49 { 50 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 51 int ierr; 52 53 PetscFunctionBegin; 54 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 55 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 EXTERN_C_END 59 60 EXTERN_C_BEGIN 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 63 int MatRetrieveValues_MPISBAIJ(Mat mat) 64 { 65 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 66 int ierr; 67 68 PetscFunctionBegin; 69 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 70 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 EXTERN_C_END 74 75 /* 76 Local utility routine that creates a mapping from the global column 77 number to the local number in the off-diagonal part of the local 78 storage of the matrix. This is done in a non scable way since the 79 length of colmap equals the global matrix length. 80 */ 81 #undef __FUNCT__ 82 #define __FUNCT__ "CreateColmap_MPISBAIJ_Private" 83 static int CreateColmap_MPISBAIJ_Private(Mat mat) 84 { 85 PetscFunctionBegin; 86 SETERRQ(1,"Function not yet written for SBAIJ format"); 87 /* PetscFunctionReturn(0); */ 88 } 89 90 #define CHUNKSIZE 10 91 92 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 93 { \ 94 \ 95 brow = row/bs; \ 96 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 97 rmax = aimax[brow]; nrow = ailen[brow]; \ 98 bcol = col/bs; \ 99 ridx = row % bs; cidx = col % bs; \ 100 low = 0; high = nrow; \ 101 while (high-low > 3) { \ 102 t = (low+high)/2; \ 103 if (rp[t] > bcol) high = t; \ 104 else low = t; \ 105 } \ 106 for (_i=low; _i<high; _i++) { \ 107 if (rp[_i] > bcol) break; \ 108 if (rp[_i] == bcol) { \ 109 bap = ap + bs2*_i + bs*cidx + ridx; \ 110 if (addv == ADD_VALUES) *bap += value; \ 111 else *bap = value; \ 112 goto a_noinsert; \ 113 } \ 114 } \ 115 if (a->nonew == 1) goto a_noinsert; \ 116 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 117 if (nrow >= rmax) { \ 118 /* there is no extra room in row, therefore enlarge */ \ 119 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 120 MatScalar *new_a; \ 121 \ 122 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 123 \ 124 /* malloc new storage space */ \ 125 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 126 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 127 new_j = (int*)(new_a + bs2*new_nz); \ 128 new_i = new_j + new_nz; \ 129 \ 130 /* copy over old data into new slots */ \ 131 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 132 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 133 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 134 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 135 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 136 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 137 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 138 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 139 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 140 /* free up old matrix storage */ \ 141 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 142 if (!a->singlemalloc) { \ 143 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 144 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 145 } \ 146 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 147 a->singlemalloc = PETSC_TRUE; \ 148 \ 149 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 150 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 151 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 152 a->s_maxnz += bs2*CHUNKSIZE; \ 153 a->reallocs++; \ 154 a->s_nz++; \ 155 } \ 156 N = nrow++ - 1; \ 157 /* shift up all the later entries in this row */ \ 158 for (ii=N; ii>=_i; ii--) { \ 159 rp[ii+1] = rp[ii]; \ 160 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 161 } \ 162 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 163 rp[_i] = bcol; \ 164 ap[bs2*_i + bs*cidx + ridx] = value; \ 165 a_noinsert:; \ 166 ailen[brow] = nrow; \ 167 } 168 #ifndef MatSetValues_SeqBAIJ_B_Private 169 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 170 { \ 171 brow = row/bs; \ 172 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 173 rmax = bimax[brow]; nrow = bilen[brow]; \ 174 bcol = col/bs; \ 175 ridx = row % bs; cidx = col % bs; \ 176 low = 0; high = nrow; \ 177 while (high-low > 3) { \ 178 t = (low+high)/2; \ 179 if (rp[t] > bcol) high = t; \ 180 else low = t; \ 181 } \ 182 for (_i=low; _i<high; _i++) { \ 183 if (rp[_i] > bcol) break; \ 184 if (rp[_i] == bcol) { \ 185 bap = ap + bs2*_i + bs*cidx + ridx; \ 186 if (addv == ADD_VALUES) *bap += value; \ 187 else *bap = value; \ 188 goto b_noinsert; \ 189 } \ 190 } \ 191 if (b->nonew == 1) goto b_noinsert; \ 192 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 193 if (nrow >= rmax) { \ 194 /* there is no extra room in row, therefore enlarge */ \ 195 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 196 MatScalar *new_a; \ 197 \ 198 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 199 \ 200 /* malloc new storage space */ \ 201 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 202 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 203 new_j = (int*)(new_a + bs2*new_nz); \ 204 new_i = new_j + new_nz; \ 205 \ 206 /* copy over old data into new slots */ \ 207 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 208 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 209 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 210 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 211 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 212 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 213 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 214 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 215 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 216 /* free up old matrix storage */ \ 217 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 218 if (!b->singlemalloc) { \ 219 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 220 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 221 } \ 222 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 223 b->singlemalloc = PETSC_TRUE; \ 224 \ 225 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 226 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 227 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 228 b->maxnz += bs2*CHUNKSIZE; \ 229 b->reallocs++; \ 230 b->nz++; \ 231 } \ 232 N = nrow++ - 1; \ 233 /* shift up all the later entries in this row */ \ 234 for (ii=N; ii>=_i; ii--) { \ 235 rp[ii+1] = rp[ii]; \ 236 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 237 } \ 238 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 239 rp[_i] = bcol; \ 240 ap[bs2*_i + bs*cidx + ridx] = value; \ 241 b_noinsert:; \ 242 bilen[brow] = nrow; \ 243 } 244 #endif 245 246 #if defined(PETSC_USE_MAT_SINGLE) 247 #undef __FUNCT__ 248 #define __FUNCT__ "MatSetValues_MPISBAIJ" 249 int MatSetValues_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 250 { 251 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 252 int ierr,i,N = m*n; 253 MatScalar *vsingle; 254 255 PetscFunctionBegin; 256 if (N > b->setvalueslen) { 257 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 258 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 259 b->setvalueslen = N; 260 } 261 vsingle = b->setvaluescopy; 262 263 for (i=0; i<N; i++) { 264 vsingle[i] = v[i]; 265 } 266 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 267 PetscFunctionReturn(0); 268 } 269 270 #undef __FUNCT__ 271 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 272 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 273 { 274 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 275 int ierr,i,N = m*n*b->bs2; 276 MatScalar *vsingle; 277 278 PetscFunctionBegin; 279 if (N > b->setvalueslen) { 280 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 281 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 282 b->setvalueslen = N; 283 } 284 vsingle = b->setvaluescopy; 285 for (i=0; i<N; i++) { 286 vsingle[i] = v[i]; 287 } 288 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 289 PetscFunctionReturn(0); 290 } 291 292 #undef __FUNCT__ 293 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT" 294 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 295 { 296 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 297 int ierr,i,N = m*n; 298 MatScalar *vsingle; 299 300 PetscFunctionBegin; 301 SETERRQ(1,"Function not yet written for SBAIJ format"); 302 /* PetscFunctionReturn(0); */ 303 } 304 305 #undef __FUNCT__ 306 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT" 307 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 308 { 309 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 310 int ierr,i,N = m*n*b->bs2; 311 MatScalar *vsingle; 312 313 PetscFunctionBegin; 314 SETERRQ(1,"Function not yet written for SBAIJ format"); 315 /* PetscFunctionReturn(0); */ 316 } 317 #endif 318 319 /* Only add/insert a(i,j) with i<=j (blocks). 320 Any a(i,j) with i>j input by user is ingored. 321 */ 322 #undef __FUNCT__ 323 #define __FUNCT__ "MatSetValues_MPIBAIJ" 324 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 325 { 326 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 327 MatScalar value; 328 PetscTruth roworiented = baij->roworiented; 329 int ierr,i,j,row,col; 330 int rstart_orig=baij->rstart_bs; 331 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 332 int cend_orig=baij->cend_bs,bs=baij->bs; 333 334 /* Some Variables required in the macro */ 335 Mat A = baij->A; 336 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 337 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 338 MatScalar *aa=a->a; 339 340 Mat B = baij->B; 341 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 342 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 343 MatScalar *ba=b->a; 344 345 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 346 int low,high,t,ridx,cidx,bs2=a->bs2; 347 MatScalar *ap,*bap; 348 349 /* for stash */ 350 int n_loc, *in_loc=0; 351 MatScalar *v_loc=0; 352 353 PetscFunctionBegin; 354 355 if(!baij->donotstash){ 356 ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr); 357 ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr); 358 } 359 360 for (i=0; i<m; i++) { 361 if (im[i] < 0) continue; 362 #if defined(PETSC_USE_BOPT_g) 363 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 364 #endif 365 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 366 row = im[i] - rstart_orig; /* local row index */ 367 for (j=0; j<n; j++) { 368 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 369 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 370 col = in[j] - cstart_orig; /* local col index */ 371 brow = row/bs; bcol = col/bs; 372 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 373 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 374 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 375 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 376 } else if (in[j] < 0) continue; 377 #if defined(PETSC_USE_BOPT_g) 378 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Col too large");} 379 #endif 380 else { /* off-diag entry (B) */ 381 if (mat->was_assembled) { 382 if (!baij->colmap) { 383 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 384 } 385 #if defined (PETSC_USE_CTABLE) 386 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 387 col = col - 1; 388 #else 389 col = baij->colmap[in[j]/bs] - 1; 390 #endif 391 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 392 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 393 col = in[j]; 394 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 395 B = baij->B; 396 b = (Mat_SeqBAIJ*)(B)->data; 397 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 398 ba=b->a; 399 } else col += in[j]%bs; 400 } else col = in[j]; 401 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 402 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 403 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 404 } 405 } 406 } else { /* off processor entry */ 407 if (!baij->donotstash) { 408 n_loc = 0; 409 for (j=0; j<n; j++){ 410 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 411 in_loc[n_loc] = in[j]; 412 if (roworiented) { 413 v_loc[n_loc] = v[i*n+j]; 414 } else { 415 v_loc[n_loc] = v[j*m+i]; 416 } 417 n_loc++; 418 } 419 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 420 } 421 } 422 } 423 424 if(!baij->donotstash){ 425 ierr = PetscFree(in_loc);CHKERRQ(ierr); 426 ierr = PetscFree(v_loc);CHKERRQ(ierr); 427 } 428 PetscFunctionReturn(0); 429 } 430 431 #undef __FUNCT__ 432 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 433 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 434 { 435 PetscFunctionBegin; 436 SETERRQ(1,"Function not yet written for SBAIJ format"); 437 /* PetscFunctionReturn(0); */ 438 } 439 440 #define HASH_KEY 0.6180339887 441 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 442 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 443 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 444 #undef __FUNCT__ 445 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar" 446 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 447 { 448 PetscFunctionBegin; 449 SETERRQ(1,"Function not yet written for SBAIJ format"); 450 /* PetscFunctionReturn(0); */ 451 } 452 453 #undef __FUNCT__ 454 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar" 455 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 456 { 457 PetscFunctionBegin; 458 SETERRQ(1,"Function not yet written for SBAIJ format"); 459 /* PetscFunctionReturn(0); */ 460 } 461 462 #undef __FUNCT__ 463 #define __FUNCT__ "MatGetValues_MPISBAIJ" 464 int MatGetValues_MPISBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 465 { 466 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 467 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 468 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 469 470 PetscFunctionBegin; 471 for (i=0; i<m; i++) { 472 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 473 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 474 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 475 row = idxm[i] - bsrstart; 476 for (j=0; j<n; j++) { 477 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 478 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 479 if (idxn[j] >= bscstart && idxn[j] < bscend){ 480 col = idxn[j] - bscstart; 481 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 482 } else { 483 if (!baij->colmap) { 484 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 485 } 486 #if defined (PETSC_USE_CTABLE) 487 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 488 data --; 489 #else 490 data = baij->colmap[idxn[j]/bs]-1; 491 #endif 492 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 493 else { 494 col = data + idxn[j]%bs; 495 ierr = MatGetValues_SeqSBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 496 } 497 } 498 } 499 } else { 500 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 501 } 502 } 503 PetscFunctionReturn(0); 504 } 505 506 #undef __FUNCT__ 507 #define __FUNCT__ "MatNorm_MPISBAIJ" 508 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 509 { 510 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 511 /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */ 512 /* Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ*)baij->B->data; */ 513 int ierr; 514 PetscReal sum[2],*lnorm2; 515 516 PetscFunctionBegin; 517 if (baij->size == 1) { 518 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 519 } else { 520 if (type == NORM_FROBENIUS) { 521 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 522 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 523 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 524 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 525 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 526 /* 527 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 528 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]); 529 */ 530 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 531 /* 532 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]); 533 PetscSynchronizedFlush(PETSC_COMM_WORLD); */ 534 535 *norm = sqrt(sum[0] + 2*sum[1]); 536 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 537 } else { 538 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 539 } 540 } 541 PetscFunctionReturn(0); 542 } 543 544 /* 545 Creates the hash table, and sets the table 546 This table is created only once. 547 If new entried need to be added to the matrix 548 then the hash table has to be destroyed and 549 recreated. 550 */ 551 #undef __FUNCT__ 552 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private" 553 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor) 554 { 555 PetscFunctionBegin; 556 SETERRQ(1,"Function not yet written for SBAIJ format"); 557 /* PetscFunctionReturn(0); */ 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 562 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 563 { 564 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 565 int ierr,nstash,reallocs; 566 InsertMode addv; 567 568 PetscFunctionBegin; 569 if (baij->donotstash) { 570 PetscFunctionReturn(0); 571 } 572 573 /* make sure all processors are either in INSERTMODE or ADDMODE */ 574 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 575 if (addv == (ADD_VALUES|INSERT_VALUES)) { 576 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 577 } 578 mat->insertmode = addv; /* in case this processor had no cache */ 579 580 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 581 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 582 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 583 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 584 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 585 PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 591 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 592 { 593 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 594 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 595 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 596 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 597 int *row,*col,other_disassembled; 598 PetscTruth r1,r2,r3; 599 MatScalar *val; 600 InsertMode addv = mat->insertmode; 601 602 PetscFunctionBegin; 603 604 if (!baij->donotstash) { 605 while (1) { 606 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 607 /* 608 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg); 609 PetscSynchronizedFlush(PETSC_COMM_WORLD); 610 */ 611 if (!flg) break; 612 613 for (i=0; i<n;) { 614 /* Now identify the consecutive vals belonging to the same row */ 615 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 616 if (j < n) ncols = j-i; 617 else ncols = n-i; 618 /* Now assemble all these values with a single function call */ 619 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 620 i = j; 621 } 622 } 623 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 624 /* Now process the block-stash. Since the values are stashed column-oriented, 625 set the roworiented flag to column oriented, and after MatSetValues() 626 restore the original flags */ 627 r1 = baij->roworiented; 628 r2 = a->roworiented; 629 r3 = b->roworiented; 630 baij->roworiented = PETSC_FALSE; 631 a->roworiented = PETSC_FALSE; 632 b->roworiented = PETSC_FALSE; 633 while (1) { 634 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 635 if (!flg) break; 636 637 for (i=0; i<n;) { 638 /* Now identify the consecutive vals belonging to the same row */ 639 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 640 if (j < n) ncols = j-i; 641 else ncols = n-i; 642 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 643 i = j; 644 } 645 } 646 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 647 baij->roworiented = r1; 648 a->roworiented = r2; 649 b->roworiented = r3; 650 } 651 652 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 653 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 654 655 /* determine if any processor has disassembled, if so we must 656 also disassemble ourselfs, in order that we may reassemble. */ 657 /* 658 if nonzero structure of submatrix B cannot change then we know that 659 no processor disassembled thus we can skip this stuff 660 */ 661 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 662 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 663 if (mat->was_assembled && !other_disassembled) { 664 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 665 } 666 } 667 668 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 669 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 670 } 671 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 672 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 673 674 #if defined(PETSC_USE_BOPT_g) 675 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 676 PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 677 baij->ht_total_ct = 0; 678 baij->ht_insert_ct = 0; 679 } 680 #endif 681 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 682 ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 683 mat->ops->setvalues = MatSetValues_MPISBAIJ_HT; 684 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT; 685 } 686 687 if (baij->rowvalues) { 688 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 689 baij->rowvalues = 0; 690 } 691 PetscFunctionReturn(0); 692 } 693 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 696 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 697 { 698 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 699 int ierr,bs = baij->bs,size = baij->size,rank = baij->rank; 700 PetscTruth isascii,isdraw; 701 PetscViewer sviewer; 702 PetscViewerFormat format; 703 704 PetscFunctionBegin; 705 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 706 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 707 if (isascii) { 708 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 709 if (format == PETSC_VIEWER_ASCII_INFO_LONG) { 710 MatInfo info; 711 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 712 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 713 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 714 rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 715 baij->bs,(int)info.memory);CHKERRQ(ierr); 716 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 717 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 718 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 719 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 720 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 721 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 722 PetscFunctionReturn(0); 723 } else if (format == PETSC_VIEWER_ASCII_INFO) { 724 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 725 PetscFunctionReturn(0); 726 } 727 } 728 729 if (isdraw) { 730 PetscDraw draw; 731 PetscTruth isnull; 732 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 733 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 734 } 735 736 if (size == 1) { 737 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 738 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 739 } else { 740 /* assemble the entire matrix onto first processor. */ 741 Mat A; 742 Mat_SeqSBAIJ *Aloc; 743 Mat_SeqBAIJ *Bloc; 744 int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 745 MatScalar *a; 746 747 if (!rank) { 748 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 749 } else { 750 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 751 } 752 PetscLogObjectParent(mat,A); 753 754 /* copy over the A part */ 755 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 756 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 757 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 758 759 for (i=0; i<mbs; i++) { 760 rvals[0] = bs*(baij->rstart + i); 761 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 762 for (j=ai[i]; j<ai[i+1]; j++) { 763 col = (baij->cstart+aj[j])*bs; 764 for (k=0; k<bs; k++) { 765 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 766 col++; a += bs; 767 } 768 } 769 } 770 /* copy over the B part */ 771 Bloc = (Mat_SeqBAIJ*)baij->B->data; 772 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 773 for (i=0; i<mbs; i++) { 774 rvals[0] = bs*(baij->rstart + i); 775 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 776 for (j=ai[i]; j<ai[i+1]; j++) { 777 col = baij->garray[aj[j]]*bs; 778 for (k=0; k<bs; k++) { 779 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 780 col++; a += bs; 781 } 782 } 783 } 784 ierr = PetscFree(rvals);CHKERRQ(ierr); 785 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 786 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 787 /* 788 Everyone has to call to draw the matrix since the graphics waits are 789 synchronized across all processors that share the PetscDraw object 790 */ 791 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 792 if (!rank) { 793 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 794 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 795 } 796 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 797 ierr = MatDestroy(A);CHKERRQ(ierr); 798 } 799 PetscFunctionReturn(0); 800 } 801 802 #undef __FUNCT__ 803 #define __FUNCT__ "MatView_MPISBAIJ" 804 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 805 { 806 int ierr; 807 PetscTruth isascii,isdraw,issocket,isbinary; 808 809 PetscFunctionBegin; 810 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 811 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 812 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 813 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 814 if (isascii || isdraw || issocket || isbinary) { 815 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 816 } else { 817 SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 818 } 819 PetscFunctionReturn(0); 820 } 821 822 #undef __FUNCT__ 823 #define __FUNCT__ "MatDestroy_MPISBAIJ" 824 int MatDestroy_MPISBAIJ(Mat mat) 825 { 826 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 827 int ierr; 828 829 PetscFunctionBegin; 830 #if defined(PETSC_USE_LOG) 831 PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 832 #endif 833 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 834 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 835 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 836 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 837 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 838 #if defined (PETSC_USE_CTABLE) 839 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 840 #else 841 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 842 #endif 843 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 844 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 845 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 846 if (baij->slvec0) { 847 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 848 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 849 } 850 if (baij->slvec1) { 851 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 852 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 853 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 854 } 855 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 856 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 857 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 858 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 859 #if defined(PETSC_USE_MAT_SINGLE) 860 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 861 #endif 862 ierr = PetscFree(baij);CHKERRQ(ierr); 863 PetscFunctionReturn(0); 864 } 865 866 #undef __FUNCT__ 867 #define __FUNCT__ "MatMult_MPISBAIJ" 868 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 869 { 870 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 871 int ierr,nt,mbs=a->mbs,bs=a->bs; 872 PetscScalar *x,*from,zero=0.0; 873 874 PetscFunctionBegin; 875 /* 876 PetscSynchronizedPrintf(PETSC_COMM_WORLD," _1comm is called ...\n"); 877 PetscSynchronizedFlush(PETSC_COMM_WORLD); 878 */ 879 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 880 if (nt != A->n) { 881 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 882 } 883 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 884 if (nt != A->m) { 885 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 886 } 887 888 /* diagonal part */ 889 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 890 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 891 892 /* subdiagonal part */ 893 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 894 895 /* copy x into the vec slvec0 */ 896 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 897 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 898 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 899 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 900 901 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 902 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 903 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 904 905 /* supperdiagonal part */ 906 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 907 908 PetscFunctionReturn(0); 909 } 910 911 #undef __FUNCT__ 912 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 913 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 914 { 915 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 916 int ierr,nt; 917 918 PetscFunctionBegin; 919 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 920 if (nt != A->n) { 921 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 922 } 923 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 924 if (nt != A->m) { 925 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 926 } 927 928 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 929 /* do diagonal part */ 930 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 931 /* do supperdiagonal part */ 932 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 933 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 934 /* do subdiagonal part */ 935 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 936 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 937 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 938 939 PetscFunctionReturn(0); 940 } 941 942 #undef __FUNCT__ 943 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 944 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 945 { 946 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 947 int ierr,mbs=a->mbs,bs=a->bs; 948 PetscScalar *x,*from,zero=0.0; 949 950 PetscFunctionBegin; 951 /* 952 PetscSynchronizedPrintf(PETSC_COMM_WORLD," MatMultAdd is called ...\n"); 953 PetscSynchronizedFlush(PETSC_COMM_WORLD); 954 */ 955 /* diagonal part */ 956 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 957 ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr); 958 959 /* subdiagonal part */ 960 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 961 962 /* copy x into the vec slvec0 */ 963 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 964 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 965 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 966 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 967 968 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 969 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 970 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 971 972 /* supperdiagonal part */ 973 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 974 975 PetscFunctionReturn(0); 976 } 977 978 #undef __FUNCT__ 979 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 980 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 981 { 982 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 983 int ierr; 984 985 PetscFunctionBegin; 986 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 987 /* do diagonal part */ 988 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 989 /* do supperdiagonal part */ 990 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 991 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 992 993 /* do subdiagonal part */ 994 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 995 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 996 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 997 998 PetscFunctionReturn(0); 999 } 1000 1001 #undef __FUNCT__ 1002 #define __FUNCT__ "MatMultTranspose_MPISBAIJ" 1003 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy) 1004 { 1005 PetscFunctionBegin; 1006 SETERRQ(1,"Matrix is symmetric. Call MatMult()."); 1007 /* PetscFunctionReturn(0); */ 1008 } 1009 1010 #undef __FUNCT__ 1011 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ" 1012 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1013 { 1014 PetscFunctionBegin; 1015 SETERRQ(1,"Matrix is symmetric. Call MatMultAdd()."); 1016 /* PetscFunctionReturn(0); */ 1017 } 1018 1019 /* 1020 This only works correctly for square matrices where the subblock A->A is the 1021 diagonal block 1022 */ 1023 #undef __FUNCT__ 1024 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1025 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1026 { 1027 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1028 int ierr; 1029 1030 PetscFunctionBegin; 1031 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1032 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1033 PetscFunctionReturn(0); 1034 } 1035 1036 #undef __FUNCT__ 1037 #define __FUNCT__ "MatScale_MPISBAIJ" 1038 int MatScale_MPISBAIJ(PetscScalar *aa,Mat A) 1039 { 1040 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1041 int ierr; 1042 1043 PetscFunctionBegin; 1044 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1045 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1046 PetscFunctionReturn(0); 1047 } 1048 1049 #undef __FUNCT__ 1050 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1051 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1052 { 1053 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1054 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1055 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1056 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1057 int *cmap,*idx_p,cstart = mat->cstart; 1058 1059 PetscFunctionBegin; 1060 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1061 mat->getrowactive = PETSC_TRUE; 1062 1063 if (!mat->rowvalues && (idx || v)) { 1064 /* 1065 allocate enough space to hold information from the longest row. 1066 */ 1067 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1068 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1069 int max = 1,mbs = mat->mbs,tmp; 1070 for (i=0; i<mbs; i++) { 1071 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1072 if (max < tmp) { max = tmp; } 1073 } 1074 ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1075 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1076 } 1077 1078 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1079 lrow = row - brstart; /* local row index */ 1080 1081 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1082 if (!v) {pvA = 0; pvB = 0;} 1083 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1084 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1085 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1086 nztot = nzA + nzB; 1087 1088 cmap = mat->garray; 1089 if (v || idx) { 1090 if (nztot) { 1091 /* Sort by increasing column numbers, assuming A and B already sorted */ 1092 int imark = -1; 1093 if (v) { 1094 *v = v_p = mat->rowvalues; 1095 for (i=0; i<nzB; i++) { 1096 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1097 else break; 1098 } 1099 imark = i; 1100 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1101 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1102 } 1103 if (idx) { 1104 *idx = idx_p = mat->rowindices; 1105 if (imark > -1) { 1106 for (i=0; i<imark; i++) { 1107 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1108 } 1109 } else { 1110 for (i=0; i<nzB; i++) { 1111 if (cmap[cworkB[i]/bs] < cstart) 1112 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1113 else break; 1114 } 1115 imark = i; 1116 } 1117 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1118 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1119 } 1120 } else { 1121 if (idx) *idx = 0; 1122 if (v) *v = 0; 1123 } 1124 } 1125 *nz = nztot; 1126 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1127 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1128 PetscFunctionReturn(0); 1129 } 1130 1131 #undef __FUNCT__ 1132 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1133 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1134 { 1135 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1136 1137 PetscFunctionBegin; 1138 if (baij->getrowactive == PETSC_FALSE) { 1139 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1140 } 1141 baij->getrowactive = PETSC_FALSE; 1142 PetscFunctionReturn(0); 1143 } 1144 1145 #undef __FUNCT__ 1146 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ" 1147 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs) 1148 { 1149 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1150 1151 PetscFunctionBegin; 1152 *bs = baij->bs; 1153 PetscFunctionReturn(0); 1154 } 1155 1156 #undef __FUNCT__ 1157 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1158 int MatZeroEntries_MPISBAIJ(Mat A) 1159 { 1160 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1161 int ierr; 1162 1163 PetscFunctionBegin; 1164 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1165 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1166 PetscFunctionReturn(0); 1167 } 1168 1169 #undef __FUNCT__ 1170 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1171 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1172 { 1173 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1174 Mat A = a->A,B = a->B; 1175 int ierr; 1176 PetscReal isend[5],irecv[5]; 1177 1178 PetscFunctionBegin; 1179 info->block_size = (PetscReal)a->bs; 1180 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1181 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1182 isend[3] = info->memory; isend[4] = info->mallocs; 1183 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1184 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1185 isend[3] += info->memory; isend[4] += info->mallocs; 1186 if (flag == MAT_LOCAL) { 1187 info->nz_used = isend[0]; 1188 info->nz_allocated = isend[1]; 1189 info->nz_unneeded = isend[2]; 1190 info->memory = isend[3]; 1191 info->mallocs = isend[4]; 1192 } else if (flag == MAT_GLOBAL_MAX) { 1193 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1194 info->nz_used = irecv[0]; 1195 info->nz_allocated = irecv[1]; 1196 info->nz_unneeded = irecv[2]; 1197 info->memory = irecv[3]; 1198 info->mallocs = irecv[4]; 1199 } else if (flag == MAT_GLOBAL_SUM) { 1200 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1201 info->nz_used = irecv[0]; 1202 info->nz_allocated = irecv[1]; 1203 info->nz_unneeded = irecv[2]; 1204 info->memory = irecv[3]; 1205 info->mallocs = irecv[4]; 1206 } else { 1207 SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 1208 } 1209 info->rows_global = (PetscReal)A->M; 1210 info->columns_global = (PetscReal)A->N; 1211 info->rows_local = (PetscReal)A->m; 1212 info->columns_local = (PetscReal)A->N; 1213 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1214 info->fill_ratio_needed = 0; 1215 info->factor_mallocs = 0; 1216 PetscFunctionReturn(0); 1217 } 1218 1219 #undef __FUNCT__ 1220 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1221 int MatSetOption_MPISBAIJ(Mat A,MatOption op) 1222 { 1223 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1224 int ierr; 1225 1226 PetscFunctionBegin; 1227 switch (op) { 1228 case MAT_NO_NEW_NONZERO_LOCATIONS: 1229 case MAT_YES_NEW_NONZERO_LOCATIONS: 1230 case MAT_COLUMNS_UNSORTED: 1231 case MAT_COLUMNS_SORTED: 1232 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1233 case MAT_KEEP_ZEROED_ROWS: 1234 case MAT_NEW_NONZERO_LOCATION_ERR: 1235 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1236 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1237 break; 1238 case MAT_ROW_ORIENTED: 1239 a->roworiented = PETSC_TRUE; 1240 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1241 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1242 break; 1243 case MAT_ROWS_SORTED: 1244 case MAT_ROWS_UNSORTED: 1245 case MAT_YES_NEW_DIAGONALS: 1246 case MAT_USE_SINGLE_PRECISION_SOLVES: 1247 PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1248 break; 1249 case MAT_COLUMN_ORIENTED: 1250 a->roworiented = PETSC_FALSE; 1251 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1252 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1253 break; 1254 case MAT_IGNORE_OFF_PROC_ENTRIES: 1255 a->donotstash = PETSC_TRUE; 1256 break; 1257 case MAT_NO_NEW_DIAGONALS: 1258 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1259 case MAT_USE_HASH_TABLE: 1260 a->ht_flag = PETSC_TRUE; 1261 break; 1262 default: 1263 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1264 } 1265 PetscFunctionReturn(0); 1266 } 1267 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "MatTranspose_MPISBAIJ(" 1270 int MatTranspose_MPISBAIJ(Mat A,Mat *matout) 1271 { 1272 PetscFunctionBegin; 1273 SETERRQ(1,"Matrix is symmetric. MatTranspose() should not be called"); 1274 /* PetscFunctionReturn(0); */ 1275 } 1276 1277 #undef __FUNCT__ 1278 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1279 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1280 { 1281 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1282 Mat a = baij->A,b = baij->B; 1283 int ierr,s1,s2,s3; 1284 1285 PetscFunctionBegin; 1286 if (ll != rr) { 1287 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1288 } 1289 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1290 if (rr) { 1291 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1292 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1293 /* Overlap communication with computation. */ 1294 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1295 /*} if (ll) { */ 1296 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1297 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1298 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1299 /* } */ 1300 /* scale the diagonal block */ 1301 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1302 1303 /* if (rr) { */ 1304 /* Do a scatter end and then right scale the off-diagonal block */ 1305 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1306 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1307 } 1308 1309 PetscFunctionReturn(0); 1310 } 1311 1312 #undef __FUNCT__ 1313 #define __FUNCT__ "MatZeroRows_MPISBAIJ" 1314 int MatZeroRows_MPISBAIJ(Mat A,IS is,PetscScalar *diag) 1315 { 1316 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1317 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 1318 int *procs,*nprocs,j,idx,nsends,*work,row; 1319 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1320 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1321 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1322 MPI_Comm comm = A->comm; 1323 MPI_Request *send_waits,*recv_waits; 1324 MPI_Status recv_status,*send_status; 1325 IS istmp; 1326 PetscTruth found; 1327 1328 PetscFunctionBegin; 1329 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 1330 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1331 1332 /* first count number of contributors to each processor */ 1333 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 1334 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1335 procs = nprocs + size; 1336 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 1337 for (i=0; i<N; i++) { 1338 idx = rows[i]; 1339 found = PETSC_FALSE; 1340 for (j=0; j<size; j++) { 1341 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1342 nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 1343 } 1344 } 1345 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1346 } 1347 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 1348 1349 /* inform other processors of number of messages and max length*/ 1350 ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr); 1351 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 1352 nmax = work[rank]; 1353 nrecvs = work[size+rank]; 1354 ierr = PetscFree(work);CHKERRQ(ierr); 1355 1356 /* post receives: */ 1357 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 1358 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1359 for (i=0; i<nrecvs; i++) { 1360 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1361 } 1362 1363 /* do sends: 1364 1) starts[i] gives the starting index in svalues for stuff going to 1365 the ith processor 1366 */ 1367 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 1368 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1369 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 1370 starts[0] = 0; 1371 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1372 for (i=0; i<N; i++) { 1373 svalues[starts[owner[i]]++] = rows[i]; 1374 } 1375 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1376 1377 starts[0] = 0; 1378 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1379 count = 0; 1380 for (i=0; i<size; i++) { 1381 if (procs[i]) { 1382 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1383 } 1384 } 1385 ierr = PetscFree(starts);CHKERRQ(ierr); 1386 1387 base = owners[rank]*bs; 1388 1389 /* wait on receives */ 1390 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 1391 source = lens + nrecvs; 1392 count = nrecvs; slen = 0; 1393 while (count) { 1394 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1395 /* unpack receives into our local space */ 1396 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1397 source[imdex] = recv_status.MPI_SOURCE; 1398 lens[imdex] = n; 1399 slen += n; 1400 count--; 1401 } 1402 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1403 1404 /* move the data into the send scatter */ 1405 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 1406 count = 0; 1407 for (i=0; i<nrecvs; i++) { 1408 values = rvalues + i*nmax; 1409 for (j=0; j<lens[i]; j++) { 1410 lrows[count++] = values[j] - base; 1411 } 1412 } 1413 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1414 ierr = PetscFree(lens);CHKERRQ(ierr); 1415 ierr = PetscFree(owner);CHKERRQ(ierr); 1416 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1417 1418 /* actually zap the local rows */ 1419 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1420 PetscLogObjectParent(A,istmp); 1421 1422 /* 1423 Zero the required rows. If the "diagonal block" of the matrix 1424 is square and the user wishes to set the diagonal we use seperate 1425 code so that MatSetValues() is not called for each diagonal allocating 1426 new memory, thus calling lots of mallocs and slowing things down. 1427 1428 Contributed by: Mathew Knepley 1429 */ 1430 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1431 ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1432 if (diag && (l->A->M == l->A->N)) { 1433 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1434 } else if (diag) { 1435 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1436 if (((Mat_SeqSBAIJ*)l->A->data)->nonew) { 1437 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1438 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1439 } 1440 for (i=0; i<slen; i++) { 1441 row = lrows[i] + rstart_bs; 1442 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1443 } 1444 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1445 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1446 } else { 1447 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1448 } 1449 1450 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1451 ierr = PetscFree(lrows);CHKERRQ(ierr); 1452 1453 /* wait on sends */ 1454 if (nsends) { 1455 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1456 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1457 ierr = PetscFree(send_status);CHKERRQ(ierr); 1458 } 1459 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1460 ierr = PetscFree(svalues);CHKERRQ(ierr); 1461 1462 PetscFunctionReturn(0); 1463 } 1464 1465 #undef __FUNCT__ 1466 #define __FUNCT__ "MatPrintHelp_MPISBAIJ" 1467 int MatPrintHelp_MPISBAIJ(Mat A) 1468 { 1469 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1470 MPI_Comm comm = A->comm; 1471 static int called = 0; 1472 int ierr; 1473 1474 PetscFunctionBegin; 1475 if (!a->rank) { 1476 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1477 } 1478 if (called) {PetscFunctionReturn(0);} else called = 1; 1479 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1480 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1481 PetscFunctionReturn(0); 1482 } 1483 1484 #undef __FUNCT__ 1485 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1486 int MatSetUnfactored_MPISBAIJ(Mat A) 1487 { 1488 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1489 int ierr; 1490 1491 PetscFunctionBegin; 1492 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1493 PetscFunctionReturn(0); 1494 } 1495 1496 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1497 1498 #undef __FUNCT__ 1499 #define __FUNCT__ "MatEqual_MPISBAIJ" 1500 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1501 { 1502 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1503 Mat a,b,c,d; 1504 PetscTruth flg; 1505 int ierr; 1506 1507 PetscFunctionBegin; 1508 ierr = PetscTypeCompare((PetscObject)B,MATMPISBAIJ,&flg);CHKERRQ(ierr); 1509 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1510 a = matA->A; b = matA->B; 1511 c = matB->A; d = matB->B; 1512 1513 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1514 if (flg == PETSC_TRUE) { 1515 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1516 } 1517 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1518 PetscFunctionReturn(0); 1519 } 1520 1521 #undef __FUNCT__ 1522 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1523 int MatSetUpPreallocation_MPISBAIJ(Mat A) 1524 { 1525 int ierr; 1526 1527 PetscFunctionBegin; 1528 ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1529 PetscFunctionReturn(0); 1530 } 1531 /* -------------------------------------------------------------------*/ 1532 static struct _MatOps MatOps_Values = { 1533 MatSetValues_MPISBAIJ, 1534 MatGetRow_MPISBAIJ, 1535 MatRestoreRow_MPISBAIJ, 1536 MatMult_MPISBAIJ, 1537 MatMultAdd_MPISBAIJ, 1538 MatMultTranspose_MPISBAIJ, 1539 MatMultTransposeAdd_MPISBAIJ, 1540 0, 1541 0, 1542 0, 1543 0, 1544 0, 1545 0, 1546 MatRelax_MPISBAIJ, 1547 MatTranspose_MPISBAIJ, 1548 MatGetInfo_MPISBAIJ, 1549 MatEqual_MPISBAIJ, 1550 MatGetDiagonal_MPISBAIJ, 1551 MatDiagonalScale_MPISBAIJ, 1552 MatNorm_MPISBAIJ, 1553 MatAssemblyBegin_MPISBAIJ, 1554 MatAssemblyEnd_MPISBAIJ, 1555 0, 1556 MatSetOption_MPISBAIJ, 1557 MatZeroEntries_MPISBAIJ, 1558 MatZeroRows_MPISBAIJ, 1559 0, 1560 0, 1561 0, 1562 0, 1563 MatSetUpPreallocation_MPISBAIJ, 1564 0, 1565 0, 1566 0, 1567 0, 1568 MatDuplicate_MPISBAIJ, 1569 0, 1570 0, 1571 0, 1572 0, 1573 0, 1574 MatGetSubMatrices_MPISBAIJ, 1575 MatIncreaseOverlap_MPISBAIJ, 1576 MatGetValues_MPISBAIJ, 1577 0, 1578 MatPrintHelp_MPISBAIJ, 1579 MatScale_MPISBAIJ, 1580 0, 1581 0, 1582 0, 1583 MatGetBlockSize_MPISBAIJ, 1584 0, 1585 0, 1586 0, 1587 0, 1588 0, 1589 0, 1590 MatSetUnfactored_MPISBAIJ, 1591 0, 1592 MatSetValuesBlocked_MPISBAIJ, 1593 0, 1594 0, 1595 0, 1596 MatGetPetscMaps_Petsc, 1597 0, 1598 0, 1599 0, 1600 0, 1601 0, 1602 0, 1603 MatGetRowMax_MPISBAIJ}; 1604 1605 1606 EXTERN_C_BEGIN 1607 #undef __FUNCT__ 1608 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1609 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1610 { 1611 PetscFunctionBegin; 1612 *a = ((Mat_MPISBAIJ *)A->data)->A; 1613 *iscopy = PETSC_FALSE; 1614 PetscFunctionReturn(0); 1615 } 1616 EXTERN_C_END 1617 1618 EXTERN_C_BEGIN 1619 #undef __FUNCT__ 1620 #define __FUNCT__ "MatCreate_MPISBAIJ" 1621 int MatCreate_MPISBAIJ(Mat B) 1622 { 1623 Mat_MPISBAIJ *b; 1624 int ierr; 1625 PetscTruth flg; 1626 1627 PetscFunctionBegin; 1628 1629 ierr = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1630 B->data = (void*)b; 1631 ierr = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 1632 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1633 1634 B->ops->destroy = MatDestroy_MPISBAIJ; 1635 B->ops->view = MatView_MPISBAIJ; 1636 B->mapping = 0; 1637 B->factor = 0; 1638 B->assembled = PETSC_FALSE; 1639 1640 B->insertmode = NOT_SET_VALUES; 1641 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1642 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1643 1644 /* build local table of row and column ownerships */ 1645 ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1646 b->cowners = b->rowners + b->size + 2; 1647 b->rowners_bs = b->cowners + b->size + 2; 1648 PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 1649 1650 /* build cache for off array entries formed */ 1651 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1652 b->donotstash = PETSC_FALSE; 1653 b->colmap = PETSC_NULL; 1654 b->garray = PETSC_NULL; 1655 b->roworiented = PETSC_TRUE; 1656 1657 #if defined(PETSC_USE_MAT_SINGLE) 1658 /* stuff for MatSetValues_XXX in single precision */ 1659 b->setvalueslen = 0; 1660 b->setvaluescopy = PETSC_NULL; 1661 #endif 1662 1663 /* stuff used in block assembly */ 1664 b->barray = 0; 1665 1666 /* stuff used for matrix vector multiply */ 1667 b->lvec = 0; 1668 b->Mvctx = 0; 1669 b->slvec0 = 0; 1670 b->slvec0b = 0; 1671 b->slvec1 = 0; 1672 b->slvec1a = 0; 1673 b->slvec1b = 0; 1674 b->sMvctx = 0; 1675 1676 /* stuff for MatGetRow() */ 1677 b->rowindices = 0; 1678 b->rowvalues = 0; 1679 b->getrowactive = PETSC_FALSE; 1680 1681 /* hash table stuff */ 1682 b->ht = 0; 1683 b->hd = 0; 1684 b->ht_size = 0; 1685 b->ht_flag = PETSC_FALSE; 1686 b->ht_fact = 0; 1687 b->ht_total_ct = 0; 1688 b->ht_insert_ct = 0; 1689 1690 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1691 if (flg) { 1692 PetscReal fact = 1.39; 1693 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1694 ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1695 if (fact <= 1.0) fact = 1.39; 1696 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1697 PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact); 1698 } 1699 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1700 "MatStoreValues_MPISBAIJ", 1701 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1702 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1703 "MatRetrieveValues_MPISBAIJ", 1704 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1705 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1706 "MatGetDiagonalBlock_MPISBAIJ", 1707 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1708 PetscFunctionReturn(0); 1709 } 1710 EXTERN_C_END 1711 1712 #undef __FUNCT__ 1713 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1714 /*@C 1715 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1716 the user should preallocate the matrix storage by setting the parameters 1717 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1718 performance can be increased by more than a factor of 50. 1719 1720 Collective on Mat 1721 1722 Input Parameters: 1723 + A - the matrix 1724 . bs - size of blockk 1725 . d_nz - number of block nonzeros per block row in diagonal portion of local 1726 submatrix (same for all local rows) 1727 . d_nnz - array containing the number of block nonzeros in the various block rows 1728 of the in diagonal portion of the local (possibly different for each block 1729 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1730 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1731 submatrix (same for all local rows). 1732 - o_nnz - array containing the number of nonzeros in the various block rows of the 1733 off-diagonal portion of the local submatrix (possibly different for 1734 each block row) or PETSC_NULL. 1735 1736 1737 Options Database Keys: 1738 . -mat_no_unroll - uses code that does not unroll the loops in the 1739 block calculations (much slower) 1740 . -mat_block_size - size of the blocks to use 1741 1742 Notes: 1743 1744 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1745 than it must be used on all processors that share the object for that argument. 1746 1747 Storage Information: 1748 For a square global matrix we define each processor's diagonal portion 1749 to be its local rows and the corresponding columns (a square submatrix); 1750 each processor's off-diagonal portion encompasses the remainder of the 1751 local matrix (a rectangular submatrix). 1752 1753 The user can specify preallocated storage for the diagonal part of 1754 the local submatrix with either d_nz or d_nnz (not both). Set 1755 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1756 memory allocation. Likewise, specify preallocated storage for the 1757 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1758 1759 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1760 the figure below we depict these three local rows and all columns (0-11). 1761 1762 .vb 1763 0 1 2 3 4 5 6 7 8 9 10 11 1764 ------------------- 1765 row 3 | o o o d d d o o o o o o 1766 row 4 | o o o d d d o o o o o o 1767 row 5 | o o o d d d o o o o o o 1768 ------------------- 1769 .ve 1770 1771 Thus, any entries in the d locations are stored in the d (diagonal) 1772 submatrix, and any entries in the o locations are stored in the 1773 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1774 stored simply in the MATSEQBAIJ format for compressed row storage. 1775 1776 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1777 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1778 In general, for PDE problems in which most nonzeros are near the diagonal, 1779 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1780 or you will get TERRIBLE performance; see the users' manual chapter on 1781 matrices. 1782 1783 Level: intermediate 1784 1785 .keywords: matrix, block, aij, compressed row, sparse, parallel 1786 1787 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1788 @*/ 1789 1790 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 1791 { 1792 Mat_MPISBAIJ *b; 1793 int ierr,i,mbs,Mbs; 1794 PetscTruth flg2; 1795 1796 PetscFunctionBegin; 1797 ierr = PetscTypeCompare((PetscObject)B,MATMPISBAIJ,&flg2);CHKERRQ(ierr); 1798 if (!flg2) PetscFunctionReturn(0); 1799 1800 ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1801 1802 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1803 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1804 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1805 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1806 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1807 if (d_nnz) { 1808 for (i=0; i<B->m/bs; i++) { 1809 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 1810 } 1811 } 1812 if (o_nnz) { 1813 for (i=0; i<B->m/bs; i++) { 1814 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 1815 } 1816 } 1817 B->preallocated = PETSC_TRUE; 1818 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 1819 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 1820 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1821 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr); 1822 1823 b = (Mat_MPISBAIJ*)B->data; 1824 mbs = B->m/bs; 1825 Mbs = B->M/bs; 1826 if (mbs*bs != B->m) { 1827 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs); 1828 } 1829 1830 b->bs = bs; 1831 b->bs2 = bs*bs; 1832 b->mbs = mbs; 1833 b->nbs = mbs; 1834 b->Mbs = Mbs; 1835 b->Nbs = Mbs; 1836 1837 ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1838 b->rowners[0] = 0; 1839 for (i=2; i<=b->size; i++) { 1840 b->rowners[i] += b->rowners[i-1]; 1841 } 1842 b->rstart = b->rowners[b->rank]; 1843 b->rend = b->rowners[b->rank+1]; 1844 b->cstart = b->rstart; 1845 b->cend = b->rend; 1846 for (i=0; i<=b->size; i++) { 1847 b->rowners_bs[i] = b->rowners[i]*bs; 1848 } 1849 b->rstart_bs = b-> rstart*bs; 1850 b->rend_bs = b->rend*bs; 1851 1852 b->cstart_bs = b->cstart*bs; 1853 b->cend_bs = b->cend*bs; 1854 1855 1856 ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,B->m,B->m,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1857 PetscLogObjectParent(B,b->A); 1858 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->M,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1859 PetscLogObjectParent(B,b->B); 1860 1861 /* build cache for off array entries formed */ 1862 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1863 1864 PetscFunctionReturn(0); 1865 } 1866 1867 #undef __FUNCT__ 1868 #define __FUNCT__ "MatCreateMPISBAIJ" 1869 /*@C 1870 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1871 (block compressed row). For good matrix assembly performance 1872 the user should preallocate the matrix storage by setting the parameters 1873 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1874 performance can be increased by more than a factor of 50. 1875 1876 Collective on MPI_Comm 1877 1878 Input Parameters: 1879 + comm - MPI communicator 1880 . bs - size of blockk 1881 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1882 This value should be the same as the local size used in creating the 1883 y vector for the matrix-vector product y = Ax. 1884 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1885 This value should be the same as the local size used in creating the 1886 x vector for the matrix-vector product y = Ax. 1887 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1888 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1889 . d_nz - number of block nonzeros per block row in diagonal portion of local 1890 submatrix (same for all local rows) 1891 . d_nnz - array containing the number of block nonzeros in the various block rows 1892 of the in diagonal portion of the local (possibly different for each block 1893 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1894 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1895 submatrix (same for all local rows). 1896 - o_nnz - array containing the number of nonzeros in the various block rows of the 1897 off-diagonal portion of the local submatrix (possibly different for 1898 each block row) or PETSC_NULL. 1899 1900 Output Parameter: 1901 . A - the matrix 1902 1903 Options Database Keys: 1904 . -mat_no_unroll - uses code that does not unroll the loops in the 1905 block calculations (much slower) 1906 . -mat_block_size - size of the blocks to use 1907 . -mat_mpi - use the parallel matrix data structures even on one processor 1908 (defaults to using SeqBAIJ format on one processor) 1909 1910 Notes: 1911 The user MUST specify either the local or global matrix dimensions 1912 (possibly both). 1913 1914 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1915 than it must be used on all processors that share the object for that argument. 1916 1917 Storage Information: 1918 For a square global matrix we define each processor's diagonal portion 1919 to be its local rows and the corresponding columns (a square submatrix); 1920 each processor's off-diagonal portion encompasses the remainder of the 1921 local matrix (a rectangular submatrix). 1922 1923 The user can specify preallocated storage for the diagonal part of 1924 the local submatrix with either d_nz or d_nnz (not both). Set 1925 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1926 memory allocation. Likewise, specify preallocated storage for the 1927 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1928 1929 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1930 the figure below we depict these three local rows and all columns (0-11). 1931 1932 .vb 1933 0 1 2 3 4 5 6 7 8 9 10 11 1934 ------------------- 1935 row 3 | o o o d d d o o o o o o 1936 row 4 | o o o d d d o o o o o o 1937 row 5 | o o o d d d o o o o o o 1938 ------------------- 1939 .ve 1940 1941 Thus, any entries in the d locations are stored in the d (diagonal) 1942 submatrix, and any entries in the o locations are stored in the 1943 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1944 stored simply in the MATSEQBAIJ format for compressed row storage. 1945 1946 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1947 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1948 In general, for PDE problems in which most nonzeros are near the diagonal, 1949 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1950 or you will get TERRIBLE performance; see the users' manual chapter on 1951 matrices. 1952 1953 Level: intermediate 1954 1955 .keywords: matrix, block, aij, compressed row, sparse, parallel 1956 1957 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1958 @*/ 1959 1960 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1961 { 1962 int ierr,size; 1963 1964 PetscFunctionBegin; 1965 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 1966 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1967 if (size > 1) { 1968 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 1969 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1970 } else { 1971 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1972 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1973 } 1974 PetscFunctionReturn(0); 1975 } 1976 1977 1978 #undef __FUNCT__ 1979 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 1980 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1981 { 1982 Mat mat; 1983 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 1984 int ierr,len=0; 1985 1986 PetscFunctionBegin; 1987 *newmat = 0; 1988 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1989 ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr); 1990 mat->preallocated = PETSC_TRUE; 1991 a = (Mat_MPISBAIJ*)mat->data; 1992 a->bs = oldmat->bs; 1993 a->bs2 = oldmat->bs2; 1994 a->mbs = oldmat->mbs; 1995 a->nbs = oldmat->nbs; 1996 a->Mbs = oldmat->Mbs; 1997 a->Nbs = oldmat->Nbs; 1998 1999 a->rstart = oldmat->rstart; 2000 a->rend = oldmat->rend; 2001 a->cstart = oldmat->cstart; 2002 a->cend = oldmat->cend; 2003 a->size = oldmat->size; 2004 a->rank = oldmat->rank; 2005 a->donotstash = oldmat->donotstash; 2006 a->roworiented = oldmat->roworiented; 2007 a->rowindices = 0; 2008 a->rowvalues = 0; 2009 a->getrowactive = PETSC_FALSE; 2010 a->barray = 0; 2011 a->rstart_bs = oldmat->rstart_bs; 2012 a->rend_bs = oldmat->rend_bs; 2013 a->cstart_bs = oldmat->cstart_bs; 2014 a->cend_bs = oldmat->cend_bs; 2015 2016 /* hash table stuff */ 2017 a->ht = 0; 2018 a->hd = 0; 2019 a->ht_size = 0; 2020 a->ht_flag = oldmat->ht_flag; 2021 a->ht_fact = oldmat->ht_fact; 2022 a->ht_total_ct = 0; 2023 a->ht_insert_ct = 0; 2024 2025 ierr = PetscMalloc(3*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr); 2026 PetscLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 2027 a->cowners = a->rowners + a->size + 2; 2028 a->rowners_bs = a->cowners + a->size + 2; 2029 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2030 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2031 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2032 if (oldmat->colmap) { 2033 #if defined (PETSC_USE_CTABLE) 2034 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2035 #else 2036 ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr); 2037 PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2038 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2039 #endif 2040 } else a->colmap = 0; 2041 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2042 ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr); 2043 PetscLogObjectMemory(mat,len*sizeof(int)); 2044 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2045 } else a->garray = 0; 2046 2047 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2048 PetscLogObjectParent(mat,a->lvec); 2049 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2050 2051 PetscLogObjectParent(mat,a->Mvctx); 2052 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2053 PetscLogObjectParent(mat,a->A); 2054 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2055 PetscLogObjectParent(mat,a->B); 2056 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2057 *newmat = mat; 2058 PetscFunctionReturn(0); 2059 } 2060 2061 #include "petscsys.h" 2062 2063 EXTERN_C_BEGIN 2064 #undef __FUNCT__ 2065 #define __FUNCT__ "MatLoad_MPISBAIJ" 2066 int MatLoad_MPISBAIJ(PetscViewer viewer,MatType type,Mat *newmat) 2067 { 2068 Mat A; 2069 int i,nz,ierr,j,rstart,rend,fd; 2070 PetscScalar *vals,*buf; 2071 MPI_Comm comm = ((PetscObject)viewer)->comm; 2072 MPI_Status status; 2073 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2074 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2075 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2076 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2077 int dcount,kmax,k,nzcount,tmp; 2078 2079 PetscFunctionBegin; 2080 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2081 2082 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2083 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2084 if (!rank) { 2085 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2086 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2087 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2088 if (header[3] < 0) { 2089 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2090 } 2091 } 2092 2093 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2094 M = header[1]; N = header[2]; 2095 2096 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2097 2098 /* 2099 This code adds extra rows to make sure the number of rows is 2100 divisible by the blocksize 2101 */ 2102 Mbs = M/bs; 2103 extra_rows = bs - M + bs*(Mbs); 2104 if (extra_rows == bs) extra_rows = 0; 2105 else Mbs++; 2106 if (extra_rows &&!rank) { 2107 PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"); 2108 } 2109 2110 /* determine ownership of all rows */ 2111 mbs = Mbs/size + ((Mbs % size) > rank); 2112 m = mbs*bs; 2113 ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2114 browners = rowners + size + 1; 2115 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2116 rowners[0] = 0; 2117 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2118 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2119 rstart = rowners[rank]; 2120 rend = rowners[rank+1]; 2121 2122 /* distribute row lengths to all processors */ 2123 ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr); 2124 if (!rank) { 2125 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2126 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2127 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2128 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2129 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2130 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2131 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2132 } else { 2133 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2134 } 2135 2136 if (!rank) { /* procs[0] */ 2137 /* calculate the number of nonzeros on each processor */ 2138 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2139 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2140 for (i=0; i<size; i++) { 2141 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2142 procsnz[i] += rowlengths[j]; 2143 } 2144 } 2145 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2146 2147 /* determine max buffer needed and allocate it */ 2148 maxnz = 0; 2149 for (i=0; i<size; i++) { 2150 maxnz = PetscMax(maxnz,procsnz[i]); 2151 } 2152 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2153 2154 /* read in my part of the matrix column indices */ 2155 nz = procsnz[0]; 2156 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2157 mycols = ibuf; 2158 if (size == 1) nz -= extra_rows; 2159 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2160 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2161 2162 /* read in every ones (except the last) and ship off */ 2163 for (i=1; i<size-1; i++) { 2164 nz = procsnz[i]; 2165 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2166 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2167 } 2168 /* read in the stuff for the last proc */ 2169 if (size != 1) { 2170 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2171 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2172 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2173 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2174 } 2175 ierr = PetscFree(cols);CHKERRQ(ierr); 2176 } else { /* procs[i], i>0 */ 2177 /* determine buffer space needed for message */ 2178 nz = 0; 2179 for (i=0; i<m; i++) { 2180 nz += locrowlens[i]; 2181 } 2182 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2183 mycols = ibuf; 2184 /* receive message of column indices*/ 2185 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2186 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2187 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2188 } 2189 2190 /* loop over local rows, determining number of off diagonal entries */ 2191 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2192 odlens = dlens + (rend-rstart); 2193 ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr); 2194 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2195 masked1 = mask + Mbs; 2196 masked2 = masked1 + Mbs; 2197 rowcount = 0; nzcount = 0; 2198 for (i=0; i<mbs; i++) { 2199 dcount = 0; 2200 odcount = 0; 2201 for (j=0; j<bs; j++) { 2202 kmax = locrowlens[rowcount]; 2203 for (k=0; k<kmax; k++) { 2204 tmp = mycols[nzcount++]/bs; /* block col. index */ 2205 if (!mask[tmp]) { 2206 mask[tmp] = 1; 2207 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2208 else masked1[dcount++] = tmp; /* entry in diag portion */ 2209 } 2210 } 2211 rowcount++; 2212 } 2213 2214 dlens[i] = dcount; /* d_nzz[i] */ 2215 odlens[i] = odcount; /* o_nzz[i] */ 2216 2217 /* zero out the mask elements we set */ 2218 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2219 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2220 } 2221 2222 /* create our matrix */ 2223 ierr = MatCreateMPISBAIJ(comm,bs,m,m,PETSC_DETERMINE,PETSC_DETERMINE,0,dlens,0,odlens,newmat); 2224 CHKERRQ(ierr); 2225 A = *newmat; 2226 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2227 2228 if (!rank) { 2229 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2230 /* read in my part of the matrix numerical values */ 2231 nz = procsnz[0]; 2232 vals = buf; 2233 mycols = ibuf; 2234 if (size == 1) nz -= extra_rows; 2235 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2236 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2237 2238 /* insert into matrix */ 2239 jj = rstart*bs; 2240 for (i=0; i<m; i++) { 2241 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2242 mycols += locrowlens[i]; 2243 vals += locrowlens[i]; 2244 jj++; 2245 } 2246 2247 /* read in other processors (except the last one) and ship out */ 2248 for (i=1; i<size-1; i++) { 2249 nz = procsnz[i]; 2250 vals = buf; 2251 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2252 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2253 } 2254 /* the last proc */ 2255 if (size != 1){ 2256 nz = procsnz[i] - extra_rows; 2257 vals = buf; 2258 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2259 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2260 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2261 } 2262 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2263 2264 } else { 2265 /* receive numeric values */ 2266 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2267 2268 /* receive message of values*/ 2269 vals = buf; 2270 mycols = ibuf; 2271 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2272 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2273 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2274 2275 /* insert into matrix */ 2276 jj = rstart*bs; 2277 for (i=0; i<m; i++) { 2278 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2279 mycols += locrowlens[i]; 2280 vals += locrowlens[i]; 2281 jj++; 2282 } 2283 } 2284 2285 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2286 ierr = PetscFree(buf);CHKERRQ(ierr); 2287 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2288 ierr = PetscFree(rowners);CHKERRQ(ierr); 2289 ierr = PetscFree(dlens);CHKERRQ(ierr); 2290 ierr = PetscFree(mask);CHKERRQ(ierr); 2291 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2292 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2293 PetscFunctionReturn(0); 2294 } 2295 EXTERN_C_END 2296 2297 #undef __FUNCT__ 2298 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2299 /*@ 2300 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2301 2302 Input Parameters: 2303 . mat - the matrix 2304 . fact - factor 2305 2306 Collective on Mat 2307 2308 Level: advanced 2309 2310 Notes: 2311 This can also be set by the command line option: -mat_use_hash_table fact 2312 2313 .keywords: matrix, hashtable, factor, HT 2314 2315 .seealso: MatSetOption() 2316 @*/ 2317 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2318 { 2319 PetscFunctionBegin; 2320 SETERRQ(1,"Function not yet written for SBAIJ format"); 2321 /* PetscFunctionReturn(0); */ 2322 } 2323 2324 #undef __FUNCT__ 2325 #define __FUNCT__ "MatGetRowMax_MPISBAIJ" 2326 int MatGetRowMax_MPISBAIJ(Mat A,Vec v) 2327 { 2328 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2329 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2330 PetscReal atmp; 2331 PetscReal *work,*svalues,*rvalues; 2332 int ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2333 int rank,size,*rowners_bs,dest,count,source; 2334 PetscScalar *va; 2335 MatScalar *ba; 2336 MPI_Status stat; 2337 2338 PetscFunctionBegin; 2339 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 2340 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2341 2342 ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); 2343 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 2344 2345 bs = a->bs; 2346 mbs = a->mbs; 2347 Mbs = a->Mbs; 2348 ba = b->a; 2349 bi = b->i; 2350 bj = b->j; 2351 /* 2352 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs); 2353 PetscSynchronizedFlush(PETSC_COMM_WORLD); 2354 */ 2355 2356 /* find ownerships */ 2357 rowners_bs = a->rowners_bs; 2358 /* 2359 if (!rank){ 2360 for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]); 2361 } 2362 */ 2363 2364 /* each proc creates an array to be distributed */ 2365 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2366 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2367 2368 /* row_max for B */ 2369 if (rank != size-1){ 2370 for (i=0; i<mbs; i++) { 2371 ncols = bi[1] - bi[0]; bi++; 2372 brow = bs*i; 2373 for (j=0; j<ncols; j++){ 2374 bcol = bs*(*bj); 2375 for (kcol=0; kcol<bs; kcol++){ 2376 col = bcol + kcol; /* local col index */ 2377 col += rowners_bs[rank+1]; /* global col index */ 2378 /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */ 2379 for (krow=0; krow<bs; krow++){ 2380 atmp = PetscAbsScalar(*ba); ba++; 2381 row = brow + krow; /* local row index */ 2382 /* printf("val[%d,%d]: %g\n",row,col,atmp); */ 2383 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2384 if (work[col] < atmp) work[col] = atmp; 2385 } 2386 } 2387 bj++; 2388 } 2389 } 2390 /* 2391 PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank); 2392 for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]); 2393 PetscPrintf(PETSC_COMM_SELF,"[%d]: \n"); 2394 */ 2395 2396 /* send values to its owners */ 2397 for (dest=rank+1; dest<size; dest++){ 2398 svalues = work + rowners_bs[dest]; 2399 count = rowners_bs[dest+1]-rowners_bs[dest]; 2400 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PETSC_COMM_WORLD);CHKERRQ(ierr); 2401 /* 2402 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]); 2403 PetscSynchronizedFlush(PETSC_COMM_WORLD); 2404 */ 2405 } 2406 } 2407 2408 /* receive values */ 2409 if (rank){ 2410 rvalues = work; 2411 count = rowners_bs[rank+1]-rowners_bs[rank]; 2412 for (source=0; source<rank; source++){ 2413 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PETSC_COMM_WORLD,&stat);CHKERRQ(ierr); 2414 /* process values */ 2415 for (i=0; i<count; i++){ 2416 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2417 } 2418 /* 2419 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]); 2420 PetscSynchronizedFlush(PETSC_COMM_WORLD); 2421 */ 2422 } 2423 } 2424 2425 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2426 ierr = PetscFree(work);CHKERRQ(ierr); 2427 PetscFunctionReturn(0); 2428 } 2429 2430 #undef __FUNCT__ 2431 #define __FUNCT__ "MatRelax_MPISBAIJ" 2432 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2433 { 2434 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2435 int ierr,mbs=mat->mbs,bs=mat->bs; 2436 PetscScalar mone=-1.0,*x,*b,*ptr,zero=0.0; 2437 Vec bb1; 2438 2439 PetscFunctionBegin; 2440 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2441 if (bs > 1) 2442 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2443 2444 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2445 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2446 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2447 its--; 2448 } 2449 2450 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2451 while (its--){ 2452 2453 /* lower triangular part: slvec0b = - B^T*xx */ 2454 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2455 2456 /* copy xx into slvec0a */ 2457 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2458 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2459 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2460 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2461 2462 ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr); 2463 2464 /* copy bb into slvec1a */ 2465 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2466 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2467 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2468 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2469 2470 /* set slvec1b = 0 */ 2471 ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr); 2472 2473 ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2474 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2475 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2476 ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2477 2478 /* upper triangular part: bb1 = bb1 - B*x */ 2479 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2480 2481 /* local diagonal sweep */ 2482 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2483 } 2484 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2485 } else { 2486 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2487 } 2488 PetscFunctionReturn(0); 2489 } 2490 2491 #undef __FUNCT__ 2492 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2493 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2494 { 2495 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2496 int ierr; 2497 PetscScalar mone=-1.0; 2498 Vec lvec1,bb1; 2499 2500 PetscFunctionBegin; 2501 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2502 if (mat->bs > 1) 2503 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2504 2505 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2506 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2507 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2508 its--; 2509 } 2510 2511 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2512 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2513 while (its--){ 2514 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2515 2516 /* lower diagonal part: bb1 = bb - B^T*xx */ 2517 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2518 ierr = VecScale(&mone,lvec1);CHKERRQ(ierr); 2519 2520 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2521 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2522 ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2523 2524 /* upper diagonal part: bb1 = bb1 - B*x */ 2525 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 2526 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2527 2528 ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2529 2530 /* diagonal sweep */ 2531 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2532 } 2533 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2534 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2535 } else { 2536 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2537 } 2538 PetscFunctionReturn(0); 2539 } 2540 2541