1 /*$Id: mpisbaij.c,v 1.17 2000/09/06 19:45:38 hzhang Exp hzhang $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "src/vec/vecimpl.h" 5 #include "mpisbaij.h" 6 #include "src/mat/impls/sbaij/seq/sbaij.h" 7 8 extern int MatSetUpMultiply_MPISBAIJ(Mat); 9 extern int DisAssemble_MPISBAIJ(Mat); 10 extern int MatIncreaseOverlap_MPISBAIJ(Mat,int,IS *,int); 11 extern int MatGetSubMatrices_MPISBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 12 extern int MatGetValues_SeqSBAIJ(Mat,int,int *,int,int *,Scalar *); 13 extern int MatSetValues_SeqSBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 14 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 15 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,Scalar**); 16 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,Scalar**); 17 extern int MatPrintHelp_SeqSBAIJ(Mat); 18 extern int MatZeroRows_SeqSBAIJ(Mat,IS,Scalar*); 19 20 /* UGLY, ugly, ugly 21 When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 22 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 23 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 24 converts the entries into single precision and then calls ..._MatScalar() to put them 25 into the single precision data structures. 26 */ 27 #if defined(PETSC_USE_MAT_SINGLE) 28 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 29 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 30 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 31 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 32 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 33 #else 34 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 35 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 36 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 37 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 38 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 39 #endif 40 41 EXTERN_C_BEGIN 42 #undef __FUNC__ 43 #define __FUNC__ /*<a name="MatStoreValues_MPISBAIJ"></a>*/"MatStoreValues_MPISBAIJ" 44 int MatStoreValues_MPISBAIJ(Mat mat) 45 { 46 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 47 int ierr; 48 49 PetscFunctionBegin; 50 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 51 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 52 PetscFunctionReturn(0); 53 } 54 EXTERN_C_END 55 56 EXTERN_C_BEGIN 57 #undef __FUNC__ 58 #define __FUNC__ /*<a name="MatRetrieveValues_MPISBAIJ"></a>*/"MatRetrieveValues_MPISBAIJ" 59 int MatRetrieveValues_MPISBAIJ(Mat mat) 60 { 61 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 62 int ierr; 63 64 PetscFunctionBegin; 65 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 66 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 67 PetscFunctionReturn(0); 68 } 69 EXTERN_C_END 70 71 /* 72 Local utility routine that creates a mapping from the global column 73 number to the local number in the off-diagonal part of the local 74 storage of the matrix. This is done in a non scable way since the 75 length of colmap equals the global matrix length. 76 */ 77 #undef __FUNC__ 78 #define __FUNC__ /*<a name="CreateColmap_MPISBAIJ_Private"></a>*/"CreateColmap_MPISBAIJ_Private" 79 static int CreateColmap_MPISBAIJ_Private(Mat mat) 80 { 81 PetscFunctionBegin; 82 SETERRQ(1,1,"Function not yet written for SBAIJ format"); 83 PetscFunctionReturn(0); 84 } 85 86 #define CHUNKSIZE 10 87 88 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 89 { \ 90 \ 91 brow = row/bs; \ 92 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 93 rmax = aimax[brow]; nrow = ailen[brow]; \ 94 bcol = col/bs; \ 95 ridx = row % bs; cidx = col % bs; \ 96 low = 0; high = nrow; \ 97 while (high-low > 3) { \ 98 t = (low+high)/2; \ 99 if (rp[t] > bcol) high = t; \ 100 else low = t; \ 101 } \ 102 for (_i=low; _i<high; _i++) { \ 103 if (rp[_i] > bcol) break; \ 104 if (rp[_i] == bcol) { \ 105 bap = ap + bs2*_i + bs*cidx + ridx; \ 106 if (addv == ADD_VALUES) *bap += value; \ 107 else *bap = value; \ 108 goto a_noinsert; \ 109 } \ 110 } \ 111 if (a->nonew == 1) goto a_noinsert; \ 112 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 113 if (nrow >= rmax) { \ 114 /* there is no extra room in row, therefore enlarge */ \ 115 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 116 MatScalar *new_a; \ 117 \ 118 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 119 \ 120 /* malloc new storage space */ \ 121 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 122 new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 123 new_j = (int*)(new_a + bs2*new_nz); \ 124 new_i = new_j + new_nz; \ 125 \ 126 /* copy over old data into new slots */ \ 127 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 128 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 129 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 130 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 131 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 132 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 133 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 134 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 135 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 136 /* free up old matrix storage */ \ 137 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 138 if (!a->singlemalloc) { \ 139 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 140 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 141 } \ 142 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 143 a->singlemalloc = PETSC_TRUE; \ 144 \ 145 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 146 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 147 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 148 a->s_maxnz += bs2*CHUNKSIZE; \ 149 a->reallocs++; \ 150 a->s_nz++; \ 151 } \ 152 N = nrow++ - 1; \ 153 /* shift up all the later entries in this row */ \ 154 for (ii=N; ii>=_i; ii--) { \ 155 rp[ii+1] = rp[ii]; \ 156 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 157 } \ 158 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 159 rp[_i] = bcol; \ 160 ap[bs2*_i + bs*cidx + ridx] = value; \ 161 a_noinsert:; \ 162 ailen[brow] = nrow; \ 163 } 164 #ifndef MatSetValues_SeqBAIJ_B_Private 165 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 166 { \ 167 brow = row/bs; \ 168 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 169 rmax = bimax[brow]; nrow = bilen[brow]; \ 170 bcol = col/bs; \ 171 ridx = row % bs; cidx = col % bs; \ 172 low = 0; high = nrow; \ 173 while (high-low > 3) { \ 174 t = (low+high)/2; \ 175 if (rp[t] > bcol) high = t; \ 176 else low = t; \ 177 } \ 178 for (_i=low; _i<high; _i++) { \ 179 if (rp[_i] > bcol) break; \ 180 if (rp[_i] == bcol) { \ 181 bap = ap + bs2*_i + bs*cidx + ridx; \ 182 if (addv == ADD_VALUES) *bap += value; \ 183 else *bap = value; \ 184 goto b_noinsert; \ 185 } \ 186 } \ 187 if (b->nonew == 1) goto b_noinsert; \ 188 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 189 if (nrow >= rmax) { \ 190 /* there is no extra room in row, therefore enlarge */ \ 191 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 192 MatScalar *new_a; \ 193 \ 194 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 195 \ 196 /* malloc new storage space */ \ 197 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 198 new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 199 new_j = (int*)(new_a + bs2*new_nz); \ 200 new_i = new_j + new_nz; \ 201 \ 202 /* copy over old data into new slots */ \ 203 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 204 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 205 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 206 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 207 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 208 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 209 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 210 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 211 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 212 /* free up old matrix storage */ \ 213 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 214 if (!b->singlemalloc) { \ 215 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 216 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 217 } \ 218 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 219 b->singlemalloc = PETSC_TRUE; \ 220 \ 221 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 222 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 223 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 224 b->maxnz += bs2*CHUNKSIZE; \ 225 b->reallocs++; \ 226 b->nz++; \ 227 } \ 228 N = nrow++ - 1; \ 229 /* shift up all the later entries in this row */ \ 230 for (ii=N; ii>=_i; ii--) { \ 231 rp[ii+1] = rp[ii]; \ 232 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 233 } \ 234 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 235 rp[_i] = bcol; \ 236 ap[bs2*_i + bs*cidx + ridx] = value; \ 237 b_noinsert:; \ 238 bilen[brow] = nrow; \ 239 } 240 #endif 241 242 #if defined(PETSC_USE_MAT_SINGLE) 243 #undef __FUNC__ 244 #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ"></a>*/"MatSetValues_MPISBAIJ" 245 int MatSetValues_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 246 { 247 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 248 int ierr,i,N = m*n; 249 MatScalar *vsingle; 250 251 PetscFunctionBegin; 252 if (N > b->setvalueslen) { 253 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 254 b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 255 b->setvalueslen = N; 256 } 257 vsingle = b->setvaluescopy; 258 259 for (i=0; i<N; i++) { 260 vsingle[i] = v[i]; 261 } 262 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 263 PetscFunctionReturn(0); 264 } 265 266 #undef __FUNC__ 267 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ"></a>*/"MatSetValuesBlocked_MPISBAIJ" 268 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 269 { 270 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 271 int ierr,i,N = m*n*b->bs2; 272 MatScalar *vsingle; 273 274 PetscFunctionBegin; 275 if (N > b->setvalueslen) { 276 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 277 b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 278 b->setvalueslen = N; 279 } 280 vsingle = b->setvaluescopy; 281 for (i=0; i<N; i++) { 282 vsingle[i] = v[i]; 283 } 284 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 285 PetscFunctionReturn(0); 286 } 287 288 #undef __FUNC__ 289 #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ_HT"></a>*/"MatSetValues_MPISBAIJ_HT" 290 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 291 { 292 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 293 int ierr,i,N = m*n; 294 MatScalar *vsingle; 295 296 PetscFunctionBegin; 297 SETERRQ(1,1,"Function not yet written for SBAIJ format"); 298 PetscFunctionReturn(0); 299 } 300 301 #undef __FUNC__ 302 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ_HT"></a>*/"MatSetValuesBlocked_MPISBAIJ_HT" 303 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 304 { 305 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 306 int ierr,i,N = m*n*b->bs2; 307 MatScalar *vsingle; 308 309 PetscFunctionBegin; 310 SETERRQ(1,1,"Function not yet written for SBAIJ format"); 311 PetscFunctionReturn(0); 312 } 313 #endif 314 315 /* Only add/insert a(i,j) with i<=j (blocks). 316 Any a(i,j) with i>j input by user is ingored. 317 */ 318 #undef __FUNC__ 319 #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ"></a>*/"MatSetValues_MPIBAIJ" 320 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 321 { 322 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 323 MatScalar value; 324 int ierr,i,j,row,col; 325 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 326 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 327 int cend_orig=baij->cend_bs,bs=baij->bs; 328 329 /* Some Variables required in the macro */ 330 Mat A = baij->A; 331 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 332 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 333 MatScalar *aa=a->a; 334 335 Mat B = baij->B; 336 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 337 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 338 MatScalar *ba=b->a; 339 340 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 341 int low,high,t,ridx,cidx,bs2=a->bs2; 342 MatScalar *ap,*bap; 343 344 /* for stash */ 345 int n_loc, *in_loc=0; 346 MatScalar *v_loc=0; 347 348 PetscFunctionBegin; 349 350 if(!baij->donotstash){ 351 in_loc = (int*)PetscMalloc(n*sizeof(int));CHKPTRQ(in_loc); 352 v_loc = (MatScalar*)PetscMalloc(n*sizeof(MatScalar));CHKPTRQ(v_loc); 353 } 354 355 for (i=0; i<m; i++) { 356 if (im[i] < 0) continue; 357 #if defined(PETSC_USE_BOPT_g) 358 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 359 #endif 360 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 361 row = im[i] - rstart_orig; /* local row index */ 362 for (j=0; j<n; j++) { 363 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 364 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 365 col = in[j] - cstart_orig; /* local col index */ 366 brow = row/bs; bcol = col/bs; 367 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 368 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 369 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 370 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 371 } else if (in[j] < 0) continue; 372 #if defined(PETSC_USE_BOPT_g) 373 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 374 #endif 375 else { /* off-diag entry (B) */ 376 if (mat->was_assembled) { 377 if (!baij->colmap) { 378 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 379 } 380 #if defined (PETSC_USE_CTABLE) 381 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 382 col = col - 1 + in[j]%bs; 383 #else 384 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 385 #endif 386 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 387 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 388 col = in[j]; 389 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 390 B = baij->B; 391 b = (Mat_SeqBAIJ*)(B)->data; 392 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 393 ba=b->a; 394 } 395 } else col = in[j]; 396 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 397 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 398 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 399 } 400 } 401 } else { /* off processor entry */ 402 if (!baij->donotstash) { 403 n_loc = 0; 404 for (j=0; j<n; j++){ 405 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 406 in_loc[n_loc] = in[j]; 407 if (roworiented) { 408 v_loc[n_loc] = v[i*n+j]; 409 } else { 410 v_loc[n_loc] = v[j*m+i]; 411 } 412 n_loc++; 413 } 414 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 415 } 416 } 417 } 418 419 if(!baij->donotstash){ 420 ierr = PetscFree(in_loc);CHKERRQ(ierr); 421 ierr = PetscFree(v_loc);CHKERRQ(ierr); 422 } 423 PetscFunctionReturn(0); 424 } 425 426 #undef __FUNC__ 427 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ"></a>*/"MatSetValuesBlocked_MPISBAIJ" 428 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 429 { 430 PetscFunctionBegin; 431 SETERRQ(1,1,"Function not yet written for SBAIJ format"); 432 PetscFunctionReturn(0); 433 } 434 435 #define HASH_KEY 0.6180339887 436 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 437 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 438 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 439 #undef __FUNC__ 440 #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ_HT_MatScalar"></a>*/"MatSetValues_MPISBAIJ_HT_MatScalar" 441 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 442 { 443 PetscFunctionBegin; 444 SETERRQ(1,1,"Function not yet written for SBAIJ format"); 445 PetscFunctionReturn(0); 446 } 447 448 #undef __FUNC__ 449 #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPISBAIJ_HT_MatScalar" 450 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 451 { 452 PetscFunctionBegin; 453 SETERRQ(1,1,"Function not yet written for SBAIJ format"); 454 PetscFunctionReturn(0); 455 } 456 457 #undef __FUNC__ 458 #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPISBAIJ" 459 int MatGetValues_MPISBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 460 { 461 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 462 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 463 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 464 465 PetscFunctionBegin; 466 for (i=0; i<m; i++) { 467 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 468 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 469 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 470 row = idxm[i] - bsrstart; 471 for (j=0; j<n; j++) { 472 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 473 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 474 if (idxn[j] >= bscstart && idxn[j] < bscend){ 475 col = idxn[j] - bscstart; 476 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 477 } else { 478 if (!baij->colmap) { 479 ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 480 } 481 #if defined (PETSC_USE_CTABLE) 482 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 483 data --; 484 #else 485 data = baij->colmap[idxn[j]/bs]-1; 486 #endif 487 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 488 else { 489 col = data + idxn[j]%bs; 490 ierr = MatGetValues_SeqSBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 491 } 492 } 493 } 494 } else { 495 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 496 } 497 } 498 PetscFunctionReturn(0); 499 } 500 501 #undef __FUNC__ 502 #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPISBAIJ" 503 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 504 { 505 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 506 /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */ 507 /* Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ*)baij->B->data; */ 508 int ierr; 509 PetscReal sum[2],*lnorm2; 510 511 PetscFunctionBegin; 512 if (baij->size == 1) { 513 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 514 } else { 515 if (type == NORM_FROBENIUS) { 516 lnorm2 = (double*)PetscMalloc(2*sizeof(double));CHKPTRQ(lnorm2); 517 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 518 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 519 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 520 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 521 /* 522 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 523 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]); 524 */ 525 ierr = MPI_Allreduce(lnorm2,&sum,2,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 526 /* 527 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]); 528 PetscSynchronizedFlush(PETSC_COMM_WORLD); */ 529 530 *norm = sqrt(sum[0] + 2*sum[1]); 531 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 532 } else { 533 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 534 } 535 } 536 PetscFunctionReturn(0); 537 } 538 539 /* 540 Creates the hash table, and sets the table 541 This table is created only once. 542 If new entried need to be added to the matrix 543 then the hash table has to be destroyed and 544 recreated. 545 */ 546 #undef __FUNC__ 547 #define __FUNC__ /*<a name=""></a>*/"MatCreateHashTable_MPISBAIJ_Private" 548 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor) 549 { 550 PetscFunctionBegin; 551 SETERRQ(1,1,"Function not yet written for SBAIJ format"); 552 PetscFunctionReturn(0); 553 } 554 555 #undef __FUNC__ 556 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPISBAIJ" 557 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 558 { 559 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 560 int ierr,nstash,reallocs; 561 InsertMode addv; 562 563 PetscFunctionBegin; 564 if (baij->donotstash) { 565 PetscFunctionReturn(0); 566 } 567 568 /* make sure all processors are either in INSERTMODE or ADDMODE */ 569 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 570 if (addv == (ADD_VALUES|INSERT_VALUES)) { 571 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 572 } 573 mat->insertmode = addv; /* in case this processor had no cache */ 574 575 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 576 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 577 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 578 PLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 579 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 580 PLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 581 PetscFunctionReturn(0); 582 } 583 584 #undef __FUNC__ 585 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPISBAIJ" 586 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 587 { 588 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 589 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 590 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 591 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 592 int *row,*col,other_disassembled; 593 PetscTruth r1,r2,r3; 594 MatScalar *val; 595 InsertMode addv = mat->insertmode; 596 int rank; 597 598 PetscFunctionBegin; 599 /* remove 2 line below later */ 600 ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); 601 602 if (!baij->donotstash) { 603 while (1) { 604 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 605 /* 606 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg); 607 PetscSynchronizedFlush(PETSC_COMM_WORLD); 608 */ 609 if (!flg) break; 610 611 for (i=0; i<n;) { 612 /* Now identify the consecutive vals belonging to the same row */ 613 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 614 if (j < n) ncols = j-i; 615 else ncols = n-i; 616 /* Now assemble all these values with a single function call */ 617 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 618 i = j; 619 } 620 } 621 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 622 /* Now process the block-stash. Since the values are stashed column-oriented, 623 set the roworiented flag to column oriented, and after MatSetValues() 624 restore the original flags */ 625 r1 = baij->roworiented; 626 r2 = a->roworiented; 627 r3 = b->roworiented; 628 baij->roworiented = PETSC_FALSE; 629 a->roworiented = PETSC_FALSE; 630 b->roworiented = PETSC_FALSE; 631 while (1) { 632 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 633 if (!flg) break; 634 635 for (i=0; i<n;) { 636 /* Now identify the consecutive vals belonging to the same row */ 637 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 638 if (j < n) ncols = j-i; 639 else ncols = n-i; 640 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 641 i = j; 642 } 643 } 644 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 645 baij->roworiented = r1; 646 a->roworiented = r2; 647 b->roworiented = r3; 648 } 649 650 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 651 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 652 653 /* determine if any processor has disassembled, if so we must 654 also disassemble ourselfs, in order that we may reassemble. */ 655 /* 656 if nonzero structure of submatrix B cannot change then we know that 657 no processor disassembled thus we can skip this stuff 658 */ 659 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 660 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 661 if (mat->was_assembled && !other_disassembled) { 662 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 663 } 664 } 665 666 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 667 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); 668 } 669 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 670 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 671 672 #if defined(PETSC_USE_BOPT_g) 673 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 674 PLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct); 675 baij->ht_total_ct = 0; 676 baij->ht_insert_ct = 0; 677 } 678 #endif 679 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 680 ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 681 mat->ops->setvalues = MatSetValues_MPISBAIJ_HT; 682 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT; 683 } 684 685 if (baij->rowvalues) { 686 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 687 baij->rowvalues = 0; 688 } 689 PetscFunctionReturn(0); 690 } 691 692 #undef __FUNC__ 693 #define __FUNC__ /*<a name=""></a>*/"MatView_MPISBAIJ_ASCIIorDraworSocket" 694 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 695 { 696 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 697 int ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank; 698 PetscTruth isascii,isdraw; 699 Viewer sviewer; 700 701 PetscFunctionBegin; 702 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 703 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 704 if (isascii) { 705 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 706 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 707 MatInfo info; 708 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 709 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 710 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 711 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 712 baij->bs,(int)info.memory);CHKERRQ(ierr); 713 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 714 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 715 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 716 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 717 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 718 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 719 PetscFunctionReturn(0); 720 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 721 ierr = ViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 722 PetscFunctionReturn(0); 723 } 724 } 725 726 if (isdraw) { 727 Draw draw; 728 PetscTruth isnull; 729 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 730 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 731 } 732 733 if (size == 1) { 734 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 735 } else { 736 /* assemble the entire matrix onto first processor. */ 737 Mat A; 738 Mat_SeqSBAIJ *Aloc; 739 Mat_SeqBAIJ *Bloc; 740 int M = baij->M,N = baij->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 741 MatScalar *a; 742 743 if (!rank) { 744 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 745 } else { 746 ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 747 } 748 PLogObjectParent(mat,A); 749 750 /* copy over the A part */ 751 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 752 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 753 rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 754 755 for (i=0; i<mbs; i++) { 756 rvals[0] = bs*(baij->rstart + i); 757 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 758 for (j=ai[i]; j<ai[i+1]; j++) { 759 col = (baij->cstart+aj[j])*bs; 760 for (k=0; k<bs; k++) { 761 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 762 col++; a += bs; 763 } 764 } 765 } 766 /* copy over the B part */ 767 Bloc = (Mat_SeqBAIJ*)baij->B->data; 768 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 769 for (i=0; i<mbs; i++) { 770 rvals[0] = bs*(baij->rstart + i); 771 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 772 for (j=ai[i]; j<ai[i+1]; j++) { 773 col = baij->garray[aj[j]]*bs; 774 for (k=0; k<bs; k++) { 775 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 776 col++; a += bs; 777 } 778 } 779 } 780 ierr = PetscFree(rvals);CHKERRQ(ierr); 781 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 782 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 783 /* 784 Everyone has to call to draw the matrix since the graphics waits are 785 synchronized across all processors that share the Draw object 786 */ 787 ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 788 if (!rank) { 789 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 790 } 791 ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 792 ierr = MatDestroy(A);CHKERRQ(ierr); 793 } 794 PetscFunctionReturn(0); 795 } 796 797 #undef __FUNC__ 798 #define __FUNC__ /*<a name=""></a>*/"MatView_MPISBAIJ" 799 int MatView_MPISBAIJ(Mat mat,Viewer viewer) 800 { 801 int ierr; 802 PetscTruth isascii,isdraw,issocket,isbinary; 803 804 PetscFunctionBegin; 805 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 806 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 807 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 808 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 809 if (isascii || isdraw || issocket || isbinary) { 810 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 811 } else { 812 SETERRQ1(1,1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 813 } 814 PetscFunctionReturn(0); 815 } 816 817 #undef __FUNC__ 818 #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPISBAIJ" 819 int MatDestroy_MPISBAIJ(Mat mat) 820 { 821 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 822 int ierr; 823 824 PetscFunctionBegin; 825 826 if (mat->mapping) { 827 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 828 } 829 if (mat->bmapping) { 830 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 831 } 832 if (mat->rmap) { 833 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 834 } 835 if (mat->cmap) { 836 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 837 } 838 #if defined(PETSC_USE_LOG) 839 PLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",baij->M,baij->N); 840 #endif 841 842 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 843 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 844 845 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 846 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 847 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 848 #if defined (PETSC_USE_CTABLE) 849 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 850 #else 851 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 852 #endif 853 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 854 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 855 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 856 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 857 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 858 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 859 #if defined(PETSC_USE_MAT_SINGLE) 860 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 861 #endif 862 ierr = PetscFree(baij);CHKERRQ(ierr); 863 PLogObjectDestroy(mat); 864 PetscHeaderDestroy(mat); 865 PetscFunctionReturn(0); 866 } 867 868 #undef __FUNC__ 869 #define __FUNC__ /*<a name=""></a>*/"MatMult_MPISBAIJ" 870 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 871 { 872 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 873 int ierr,nt; 874 875 PetscFunctionBegin; 876 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 877 if (nt != a->n) { 878 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 879 } 880 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 881 if (nt != a->m) { 882 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 883 } 884 885 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 886 /* do diagonal part */ 887 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 888 /* do supperdiagonal part */ 889 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 890 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 891 /* do subdiagonal part */ 892 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 893 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 894 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 895 896 PetscFunctionReturn(0); 897 } 898 899 #undef __FUNC__ 900 #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPISBAIJ" 901 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 902 { 903 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 904 int ierr; 905 906 PetscFunctionBegin; 907 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 908 /* do diagonal part */ 909 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 910 /* do supperdiagonal part */ 911 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 912 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 913 914 /* do subdiagonal part */ 915 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 916 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 917 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 918 919 PetscFunctionReturn(0); 920 } 921 922 #undef __FUNC__ 923 #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPISBAIJ" 924 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy) 925 { 926 PetscFunctionBegin; 927 SETERRQ(1,1,"Matrix is symmetric. Call MatMult()."); 928 PetscFunctionReturn(0); 929 } 930 931 #undef __FUNC__ 932 #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPISBAIJ" 933 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 934 { 935 PetscFunctionBegin; 936 SETERRQ(1,1,"Matrix is symmetric. Call MatMultAdd()."); 937 PetscFunctionReturn(0); 938 } 939 940 /* 941 This only works correctly for square matrices where the subblock A->A is the 942 diagonal block 943 */ 944 #undef __FUNC__ 945 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPISBAIJ" 946 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 947 { 948 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 949 int ierr; 950 951 PetscFunctionBegin; 952 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); */ 953 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 954 PetscFunctionReturn(0); 955 } 956 957 #undef __FUNC__ 958 #define __FUNC__ /*<a name=""></a>*/"MatScale_MPISBAIJ" 959 int MatScale_MPISBAIJ(Scalar *aa,Mat A) 960 { 961 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 962 int ierr; 963 964 PetscFunctionBegin; 965 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 966 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 967 PetscFunctionReturn(0); 968 } 969 970 #undef __FUNC__ 971 #define __FUNC__ /*<a name=""></a>*/"MatGetSize_MPISBAIJ" 972 int MatGetSize_MPISBAIJ(Mat matin,int *m,int *n) 973 { 974 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 975 976 PetscFunctionBegin; 977 if (m) *m = mat->M; 978 if (n) *n = mat->N; 979 PetscFunctionReturn(0); 980 } 981 982 #undef __FUNC__ 983 #define __FUNC__ /*<a name=""></a>*/"MatGetLocalSize_MPISBAIJ" 984 int MatGetLocalSize_MPISBAIJ(Mat matin,int *m,int *n) 985 { 986 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 987 988 PetscFunctionBegin; 989 *m = mat->m; *n = mat->n; 990 PetscFunctionReturn(0); 991 } 992 993 #undef __FUNC__ 994 #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPISBAIJ" 995 int MatGetOwnershipRange_MPISBAIJ(Mat matin,int *m,int *n) 996 { 997 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 998 999 PetscFunctionBegin; 1000 if (m) *m = mat->rstart*mat->bs; 1001 if (n) *n = mat->rend*mat->bs; 1002 PetscFunctionReturn(0); 1003 } 1004 1005 #undef __FUNC__ 1006 #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPISBAIJ" 1007 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1008 { 1009 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1010 Scalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1011 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1012 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1013 int *cmap,*idx_p,cstart = mat->cstart; 1014 1015 PetscFunctionBegin; 1016 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1017 mat->getrowactive = PETSC_TRUE; 1018 1019 if (!mat->rowvalues && (idx || v)) { 1020 /* 1021 allocate enough space to hold information from the longest row. 1022 */ 1023 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1024 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1025 int max = 1,mbs = mat->mbs,tmp; 1026 for (i=0; i<mbs; i++) { 1027 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1028 if (max < tmp) { max = tmp; } 1029 } 1030 mat->rowvalues = (Scalar*)PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1031 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1032 } 1033 1034 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1035 lrow = row - brstart; /* local row index */ 1036 1037 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1038 if (!v) {pvA = 0; pvB = 0;} 1039 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1040 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1041 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1042 nztot = nzA + nzB; 1043 1044 cmap = mat->garray; 1045 if (v || idx) { 1046 if (nztot) { 1047 /* Sort by increasing column numbers, assuming A and B already sorted */ 1048 int imark = -1; 1049 if (v) { 1050 *v = v_p = mat->rowvalues; 1051 for (i=0; i<nzB; i++) { 1052 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1053 else break; 1054 } 1055 imark = i; 1056 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1057 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1058 } 1059 if (idx) { 1060 *idx = idx_p = mat->rowindices; 1061 if (imark > -1) { 1062 for (i=0; i<imark; i++) { 1063 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1064 } 1065 } else { 1066 for (i=0; i<nzB; i++) { 1067 if (cmap[cworkB[i]/bs] < cstart) 1068 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1069 else break; 1070 } 1071 imark = i; 1072 } 1073 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1074 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1075 } 1076 } else { 1077 if (idx) *idx = 0; 1078 if (v) *v = 0; 1079 } 1080 } 1081 *nz = nztot; 1082 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1083 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #undef __FUNC__ 1088 #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPISBAIJ" 1089 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1090 { 1091 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1092 1093 PetscFunctionBegin; 1094 if (baij->getrowactive == PETSC_FALSE) { 1095 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1096 } 1097 baij->getrowactive = PETSC_FALSE; 1098 PetscFunctionReturn(0); 1099 } 1100 1101 #undef __FUNC__ 1102 #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPISBAIJ" 1103 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs) 1104 { 1105 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1106 1107 PetscFunctionBegin; 1108 *bs = baij->bs; 1109 PetscFunctionReturn(0); 1110 } 1111 1112 #undef __FUNC__ 1113 #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPISBAIJ" 1114 int MatZeroEntries_MPISBAIJ(Mat A) 1115 { 1116 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1117 int ierr; 1118 1119 PetscFunctionBegin; 1120 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1121 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1122 PetscFunctionReturn(0); 1123 } 1124 1125 #undef __FUNC__ 1126 #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPISBAIJ" 1127 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1128 { 1129 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1130 Mat A = a->A,B = a->B; 1131 int ierr; 1132 PetscReal isend[5],irecv[5]; 1133 1134 PetscFunctionBegin; 1135 info->block_size = (double)a->bs; 1136 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1137 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1138 isend[3] = info->memory; isend[4] = info->mallocs; 1139 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1140 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1141 isend[3] += info->memory; isend[4] += info->mallocs; 1142 if (flag == MAT_LOCAL) { 1143 info->nz_used = isend[0]; 1144 info->nz_allocated = isend[1]; 1145 info->nz_unneeded = isend[2]; 1146 info->memory = isend[3]; 1147 info->mallocs = isend[4]; 1148 } else if (flag == MAT_GLOBAL_MAX) { 1149 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1150 info->nz_used = irecv[0]; 1151 info->nz_allocated = irecv[1]; 1152 info->nz_unneeded = irecv[2]; 1153 info->memory = irecv[3]; 1154 info->mallocs = irecv[4]; 1155 } else if (flag == MAT_GLOBAL_SUM) { 1156 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1157 info->nz_used = irecv[0]; 1158 info->nz_allocated = irecv[1]; 1159 info->nz_unneeded = irecv[2]; 1160 info->memory = irecv[3]; 1161 info->mallocs = irecv[4]; 1162 } else { 1163 SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag); 1164 } 1165 info->rows_global = (double)a->M; 1166 info->columns_global = (double)a->N; 1167 info->rows_local = (double)a->m; 1168 info->columns_local = (double)a->N; 1169 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1170 info->fill_ratio_needed = 0; 1171 info->factor_mallocs = 0; 1172 PetscFunctionReturn(0); 1173 } 1174 1175 #undef __FUNC__ 1176 #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPISBAIJ" 1177 int MatSetOption_MPISBAIJ(Mat A,MatOption op) 1178 { 1179 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1180 int ierr; 1181 1182 PetscFunctionBegin; 1183 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1184 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1185 op == MAT_COLUMNS_UNSORTED || 1186 op == MAT_COLUMNS_SORTED || 1187 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1188 op == MAT_KEEP_ZEROED_ROWS || 1189 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1190 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1191 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1192 } else if (op == MAT_ROW_ORIENTED) { 1193 a->roworiented = PETSC_TRUE; 1194 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1195 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1196 } else if (op == MAT_ROWS_SORTED || 1197 op == MAT_ROWS_UNSORTED || 1198 op == MAT_SYMMETRIC || 1199 op == MAT_STRUCTURALLY_SYMMETRIC || 1200 op == MAT_YES_NEW_DIAGONALS || 1201 op == MAT_USE_HASH_TABLE) { 1202 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1203 } else if (op == MAT_COLUMN_ORIENTED) { 1204 a->roworiented = PETSC_FALSE; 1205 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1206 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1207 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1208 a->donotstash = PETSC_TRUE; 1209 } else if (op == MAT_NO_NEW_DIAGONALS) { 1210 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1211 } else if (op == MAT_USE_HASH_TABLE) { 1212 a->ht_flag = PETSC_TRUE; 1213 } else { 1214 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1215 } 1216 PetscFunctionReturn(0); 1217 } 1218 1219 #undef __FUNC__ 1220 #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPISBAIJ(" 1221 int MatTranspose_MPISBAIJ(Mat A,Mat *matout) 1222 { 1223 PetscFunctionBegin; 1224 SETERRQ(1,1,"Matrix is symmetric. MatTranspose() should not be called"); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 #undef __FUNC__ 1229 #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPISBAIJ" 1230 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1231 { 1232 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1233 Mat a = baij->A,b = baij->B; 1234 int ierr,s1,s2,s3; 1235 1236 PetscFunctionBegin; 1237 if (ll != rr) { 1238 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"For symmetric format, left and right scaling vectors must be same\n"); 1239 } 1240 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1241 if (rr) { 1242 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1243 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1244 /* Overlap communication with computation. */ 1245 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1246 /*} if (ll) { */ 1247 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1248 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1249 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1250 /* } */ 1251 /* scale the diagonal block */ 1252 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1253 1254 /* if (rr) { */ 1255 /* Do a scatter end and then right scale the off-diagonal block */ 1256 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1257 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1258 } 1259 1260 PetscFunctionReturn(0); 1261 } 1262 1263 #undef __FUNC__ 1264 #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPISBAIJ" 1265 int MatZeroRows_MPISBAIJ(Mat A,IS is,Scalar *diag) 1266 { 1267 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1268 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 1269 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1270 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1271 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1272 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1273 MPI_Comm comm = A->comm; 1274 MPI_Request *send_waits,*recv_waits; 1275 MPI_Status recv_status,*send_status; 1276 IS istmp; 1277 1278 PetscFunctionBegin; 1279 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 1280 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1281 1282 /* first count number of contributors to each processor */ 1283 nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs); 1284 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1285 procs = nprocs + size; 1286 owner = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 1287 for (i=0; i<N; i++) { 1288 idx = rows[i]; 1289 found = 0; 1290 for (j=0; j<size; j++) { 1291 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1292 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1293 } 1294 } 1295 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1296 } 1297 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 1298 1299 /* inform other processors of number of messages and max length*/ 1300 work = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work); 1301 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 1302 nmax = work[rank]; 1303 nrecvs = work[size+rank]; 1304 ierr = PetscFree(work);CHKERRQ(ierr); 1305 1306 /* post receives: */ 1307 rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 1308 recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1309 for (i=0; i<nrecvs; i++) { 1310 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1311 } 1312 1313 /* do sends: 1314 1) starts[i] gives the starting index in svalues for stuff going to 1315 the ith processor 1316 */ 1317 svalues = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues); 1318 send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1319 starts = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts); 1320 starts[0] = 0; 1321 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1322 for (i=0; i<N; i++) { 1323 svalues[starts[owner[i]]++] = rows[i]; 1324 } 1325 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1326 1327 starts[0] = 0; 1328 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1329 count = 0; 1330 for (i=0; i<size; i++) { 1331 if (procs[i]) { 1332 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1333 } 1334 } 1335 ierr = PetscFree(starts);CHKERRQ(ierr); 1336 1337 base = owners[rank]*bs; 1338 1339 /* wait on receives */ 1340 lens = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens); 1341 source = lens + nrecvs; 1342 count = nrecvs; slen = 0; 1343 while (count) { 1344 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1345 /* unpack receives into our local space */ 1346 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1347 source[imdex] = recv_status.MPI_SOURCE; 1348 lens[imdex] = n; 1349 slen += n; 1350 count--; 1351 } 1352 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1353 1354 /* move the data into the send scatter */ 1355 lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows); 1356 count = 0; 1357 for (i=0; i<nrecvs; i++) { 1358 values = rvalues + i*nmax; 1359 for (j=0; j<lens[i]; j++) { 1360 lrows[count++] = values[j] - base; 1361 } 1362 } 1363 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1364 ierr = PetscFree(lens);CHKERRQ(ierr); 1365 ierr = PetscFree(owner);CHKERRQ(ierr); 1366 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1367 1368 /* actually zap the local rows */ 1369 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1370 PLogObjectParent(A,istmp); 1371 1372 /* 1373 Zero the required rows. If the "diagonal block" of the matrix 1374 is square and the user wishes to set the diagonal we use seperate 1375 code so that MatSetValues() is not called for each diagonal allocating 1376 new memory, thus calling lots of mallocs and slowing things down. 1377 1378 Contributed by: Mathew Knepley 1379 */ 1380 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1381 ierr = MatZeroRows_SeqSBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1382 if (diag && (l->A->M == l->A->N)) { 1383 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1384 } else if (diag) { 1385 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1386 if (((Mat_SeqSBAIJ*)l->A->data)->nonew) { 1387 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1388 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1389 } 1390 for (i=0; i<slen; i++) { 1391 row = lrows[i] + rstart_bs; 1392 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1393 } 1394 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1395 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1396 } else { 1397 ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1398 } 1399 1400 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1401 ierr = PetscFree(lrows);CHKERRQ(ierr); 1402 1403 /* wait on sends */ 1404 if (nsends) { 1405 send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1406 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1407 ierr = PetscFree(send_status);CHKERRQ(ierr); 1408 } 1409 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1410 ierr = PetscFree(svalues);CHKERRQ(ierr); 1411 1412 PetscFunctionReturn(0); 1413 } 1414 1415 #undef __FUNC__ 1416 #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPISBAIJ" 1417 int MatPrintHelp_MPISBAIJ(Mat A) 1418 { 1419 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1420 MPI_Comm comm = A->comm; 1421 static int called = 0; 1422 int ierr; 1423 1424 PetscFunctionBegin; 1425 if (!a->rank) { 1426 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1427 } 1428 if (called) {PetscFunctionReturn(0);} else called = 1; 1429 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1430 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1431 PetscFunctionReturn(0); 1432 } 1433 1434 #undef __FUNC__ 1435 #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPISBAIJ" 1436 int MatSetUnfactored_MPISBAIJ(Mat A) 1437 { 1438 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1439 int ierr; 1440 1441 PetscFunctionBegin; 1442 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1443 PetscFunctionReturn(0); 1444 } 1445 1446 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1447 1448 #undef __FUNC__ 1449 #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPISBAIJ" 1450 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1451 { 1452 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1453 Mat a,b,c,d; 1454 PetscTruth flg; 1455 int ierr; 1456 1457 PetscFunctionBegin; 1458 if (B->type != MATMPISBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1459 a = matA->A; b = matA->B; 1460 c = matB->A; d = matB->B; 1461 1462 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1463 if (flg == PETSC_TRUE) { 1464 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1465 } 1466 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1467 PetscFunctionReturn(0); 1468 } 1469 1470 /* -------------------------------------------------------------------*/ 1471 static struct _MatOps MatOps_Values = { 1472 MatSetValues_MPISBAIJ, 1473 MatGetRow_MPISBAIJ, 1474 MatRestoreRow_MPISBAIJ, 1475 MatMult_MPISBAIJ, 1476 MatMultAdd_MPISBAIJ, 1477 MatMultTranspose_MPISBAIJ, 1478 MatMultTransposeAdd_MPISBAIJ, 1479 0, 1480 0, 1481 0, 1482 0, 1483 0, 1484 0, 1485 0, 1486 MatTranspose_MPISBAIJ, 1487 MatGetInfo_MPISBAIJ, 1488 MatEqual_MPISBAIJ, 1489 MatGetDiagonal_MPISBAIJ, 1490 MatDiagonalScale_MPISBAIJ, 1491 MatNorm_MPISBAIJ, 1492 MatAssemblyBegin_MPISBAIJ, 1493 MatAssemblyEnd_MPISBAIJ, 1494 0, 1495 MatSetOption_MPISBAIJ, 1496 MatZeroEntries_MPISBAIJ, 1497 MatZeroRows_MPISBAIJ, 1498 0, 1499 0, 1500 0, 1501 0, 1502 MatGetSize_MPISBAIJ, 1503 MatGetLocalSize_MPISBAIJ, 1504 MatGetOwnershipRange_MPISBAIJ, 1505 0, 1506 0, 1507 0, 1508 0, 1509 MatDuplicate_MPISBAIJ, 1510 0, 1511 0, 1512 0, 1513 0, 1514 0, 1515 MatGetSubMatrices_MPISBAIJ, 1516 MatIncreaseOverlap_MPISBAIJ, 1517 MatGetValues_MPISBAIJ, 1518 0, 1519 MatPrintHelp_MPISBAIJ, 1520 MatScale_MPISBAIJ, 1521 0, 1522 0, 1523 0, 1524 MatGetBlockSize_MPISBAIJ, 1525 0, 1526 0, 1527 0, 1528 0, 1529 0, 1530 0, 1531 MatSetUnfactored_MPISBAIJ, 1532 0, 1533 MatSetValuesBlocked_MPISBAIJ, 1534 0, 1535 0, 1536 0, 1537 MatGetMaps_Petsc}; 1538 1539 1540 EXTERN_C_BEGIN 1541 #undef __FUNC__ 1542 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPISBAIJ" 1543 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1544 { 1545 PetscFunctionBegin; 1546 *a = ((Mat_MPISBAIJ *)A->data)->A; 1547 *iscopy = PETSC_FALSE; 1548 PetscFunctionReturn(0); 1549 } 1550 EXTERN_C_END 1551 1552 #undef __FUNC__ 1553 #define __FUNC__ /*<a name=""></a>*/"MatCreateMPISBAIJ" 1554 /*@C 1555 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1556 (block compressed row). For good matrix assembly performance 1557 the user should preallocate the matrix storage by setting the parameters 1558 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1559 performance can be increased by more than a factor of 50. 1560 1561 Collective on MPI_Comm 1562 1563 Input Parameters: 1564 + comm - MPI communicator 1565 . bs - size of blockk 1566 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1567 This value should be the same as the local size used in creating the 1568 y vector for the matrix-vector product y = Ax. 1569 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1570 This value should be the same as the local size used in creating the 1571 x vector for the matrix-vector product y = Ax. 1572 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1573 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1574 . d_nz - number of block nonzeros per block row in diagonal portion of local 1575 submatrix (same for all local rows) 1576 . d_nnz - array containing the number of block nonzeros in the various block rows 1577 of the in diagonal portion of the local (possibly different for each block 1578 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1579 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1580 submatrix (same for all local rows). 1581 - o_nnz - array containing the number of nonzeros in the various block rows of the 1582 off-diagonal portion of the local submatrix (possibly different for 1583 each block row) or PETSC_NULL. 1584 1585 Output Parameter: 1586 . A - the matrix 1587 1588 Options Database Keys: 1589 . -mat_no_unroll - uses code that does not unroll the loops in the 1590 block calculations (much slower) 1591 . -mat_block_size - size of the blocks to use 1592 . -mat_mpi - use the parallel matrix data structures even on one processor 1593 (defaults to using SeqBAIJ format on one processor) 1594 1595 Notes: 1596 The user MUST specify either the local or global matrix dimensions 1597 (possibly both). 1598 1599 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1600 than it must be used on all processors that share the object for that argument. 1601 1602 Storage Information: 1603 For a square global matrix we define each processor's diagonal portion 1604 to be its local rows and the corresponding columns (a square submatrix); 1605 each processor's off-diagonal portion encompasses the remainder of the 1606 local matrix (a rectangular submatrix). 1607 1608 The user can specify preallocated storage for the diagonal part of 1609 the local submatrix with either d_nz or d_nnz (not both). Set 1610 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1611 memory allocation. Likewise, specify preallocated storage for the 1612 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1613 1614 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1615 the figure below we depict these three local rows and all columns (0-11). 1616 1617 .vb 1618 0 1 2 3 4 5 6 7 8 9 10 11 1619 ------------------- 1620 row 3 | o o o d d d o o o o o o 1621 row 4 | o o o d d d o o o o o o 1622 row 5 | o o o d d d o o o o o o 1623 ------------------- 1624 .ve 1625 1626 Thus, any entries in the d locations are stored in the d (diagonal) 1627 submatrix, and any entries in the o locations are stored in the 1628 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1629 stored simply in the MATSEQBAIJ format for compressed row storage. 1630 1631 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1632 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1633 In general, for PDE problems in which most nonzeros are near the diagonal, 1634 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1635 or you will get TERRIBLE performance; see the users' manual chapter on 1636 matrices. 1637 1638 Level: intermediate 1639 1640 .keywords: matrix, block, aij, compressed row, sparse, parallel 1641 1642 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1643 @*/ 1644 1645 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1646 { 1647 Mat B; 1648 Mat_MPISBAIJ *b; 1649 int ierr,i,sum[1],work[1],mbs,Mbs=PETSC_DECIDE,size; 1650 PetscTruth flag1,flag2,flg; 1651 1652 PetscFunctionBegin; 1653 if (M != N || m != n){ /* N and n are not used after this */ 1654 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"For symmetric format, set M=N and m=n"); 1655 } 1656 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1657 1658 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 1659 if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -2: value %d",d_nz); 1660 if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -2: value %d",o_nz); 1661 if (d_nnz) { 1662 for (i=0; i<m/bs; i++) { 1663 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 1664 } 1665 } 1666 if (o_nnz) { 1667 for (i=0; i<m/bs; i++) { 1668 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 1669 } 1670 } 1671 1672 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1673 ierr = OptionsHasName(PETSC_NULL,"-mat_mpisbaij",&flag1);CHKERRQ(ierr); 1674 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 1675 if (!flag1 && !flag2 && size == 1) { 1676 if (M == PETSC_DECIDE) M = m; 1677 ierr = MatCreateSeqSBAIJ(comm,bs,M,M,d_nz,d_nnz,A);CHKERRQ(ierr); 1678 PetscFunctionReturn(0); 1679 } 1680 1681 *A = 0; 1682 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPISBAIJ,"Mat",comm,MatDestroy,MatView); 1683 PLogObjectCreate(B); 1684 B->data = (void*)(b = PetscNew(Mat_MPISBAIJ));CHKPTRQ(b); 1685 ierr = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 1686 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1687 1688 B->ops->destroy = MatDestroy_MPISBAIJ; 1689 B->ops->view = MatView_MPISBAIJ; 1690 B->mapping = 0; 1691 B->factor = 0; 1692 B->assembled = PETSC_FALSE; 1693 1694 B->insertmode = NOT_SET_VALUES; 1695 ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 1696 ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr); 1697 1698 if (m == PETSC_DECIDE && (d_nnz || o_nnz)) { 1699 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1700 } 1701 if (M == PETSC_DECIDE && m == PETSC_DECIDE) { 1702 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 1703 } 1704 if (M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1705 1706 if (M == PETSC_DECIDE) { 1707 work[0] = m; mbs = m/bs; 1708 ierr = MPI_Allreduce(work,sum,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1709 M = sum[0]; Mbs = M/bs; 1710 } else { /* M is specified */ 1711 Mbs = M/bs; 1712 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 1713 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 1714 m = mbs*bs; 1715 } 1716 1717 if (mbs*bs != m) { 1718 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows/cols must be divisible by blocksize"); 1719 } 1720 1721 b->m = m; B->m = m; 1722 b->n = m; B->n = m; 1723 b->N = M; B->N = M; 1724 b->M = M; 1725 B->M = M; 1726 b->bs = bs; 1727 b->bs2 = bs*bs; 1728 b->mbs = mbs; 1729 b->nbs = mbs; 1730 b->Mbs = Mbs; 1731 b->Nbs = Mbs; 1732 1733 /* the information in the maps duplicates the information computed below, eventually 1734 we should remove the duplicate information that is not contained in the maps */ 1735 ierr = MapCreateMPI(B->comm,m,M,&B->rmap);CHKERRQ(ierr); 1736 ierr = MapCreateMPI(B->comm,m,M,&B->cmap);CHKERRQ(ierr); 1737 1738 /* build local table of row and column ownerships */ 1739 b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 1740 PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 1741 b->cowners = b->rowners + b->size + 2; 1742 b->rowners_bs = b->cowners + b->size + 2; 1743 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1744 b->rowners[0] = 0; 1745 for (i=2; i<=b->size; i++) { 1746 b->rowners[i] += b->rowners[i-1]; 1747 } 1748 for (i=0; i<=b->size; i++) { 1749 b->rowners_bs[i] = b->rowners[i]*bs; 1750 } 1751 b->rstart = b->rowners[b->rank]; 1752 b->rend = b->rowners[b->rank+1]; 1753 b->rstart_bs = b->rstart * bs; 1754 b->rend_bs = b->rend * bs; 1755 1756 b->cstart = b->rstart; 1757 b->cend = b->rend; 1758 b->cstart_bs = b->cstart * bs; 1759 b->cend_bs = b->cend * bs; 1760 1761 1762 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1763 ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,m,m,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1764 PLogObjectParent(B,b->A); 1765 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1766 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,M,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1767 PLogObjectParent(B,b->B); 1768 1769 /* build cache for off array entries formed */ 1770 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1771 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1772 b->donotstash = PETSC_FALSE; 1773 b->colmap = PETSC_NULL; 1774 b->garray = PETSC_NULL; 1775 b->roworiented = PETSC_TRUE; 1776 1777 #if defined(PEYSC_USE_MAT_SINGLE) 1778 /* stuff for MatSetValues_XXX in single precision */ 1779 b->lensetvalues = 0; 1780 b->setvaluescopy = PETSC_NULL; 1781 #endif 1782 1783 /* stuff used in block assembly */ 1784 b->barray = 0; 1785 1786 /* stuff used for matrix vector multiply */ 1787 b->lvec = 0; 1788 b->Mvctx = 0; 1789 1790 /* stuff for MatGetRow() */ 1791 b->rowindices = 0; 1792 b->rowvalues = 0; 1793 b->getrowactive = PETSC_FALSE; 1794 1795 /* hash table stuff */ 1796 b->ht = 0; 1797 b->hd = 0; 1798 b->ht_size = 0; 1799 b->ht_flag = PETSC_FALSE; 1800 b->ht_fact = 0; 1801 b->ht_total_ct = 0; 1802 b->ht_insert_ct = 0; 1803 1804 *A = B; 1805 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1806 if (flg) { 1807 double fact = 1.39; 1808 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1809 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1810 if (fact <= 1.0) fact = 1.39; 1811 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1812 PLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact); 1813 } 1814 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1815 "MatStoreValues_MPISBAIJ", 1816 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1817 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1818 "MatRetrieveValues_MPISBAIJ", 1819 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1820 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1821 "MatGetDiagonalBlock_MPISBAIJ", 1822 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1823 PetscFunctionReturn(0); 1824 } 1825 1826 1827 #undef __FUNC__ 1828 #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPISBAIJ" 1829 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1830 { 1831 Mat mat; 1832 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 1833 int ierr,len=0; 1834 PetscTruth flg; 1835 1836 PetscFunctionBegin; 1837 *newmat = 0; 1838 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPISBAIJ,"Mat",matin->comm,MatDestroy,MatView); 1839 PLogObjectCreate(mat); 1840 mat->data = (void*)(a = PetscNew(Mat_MPISBAIJ));CHKPTRQ(a); 1841 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1842 mat->ops->destroy = MatDestroy_MPISBAIJ; 1843 mat->ops->view = MatView_MPISBAIJ; 1844 mat->factor = matin->factor; 1845 mat->assembled = PETSC_TRUE; 1846 mat->insertmode = NOT_SET_VALUES; 1847 1848 a->m = mat->m = oldmat->m; 1849 a->n = mat->n = oldmat->n; 1850 a->M = mat->M = oldmat->M; 1851 a->N = mat->N = oldmat->N; 1852 1853 a->bs = oldmat->bs; 1854 a->bs2 = oldmat->bs2; 1855 a->mbs = oldmat->mbs; 1856 a->nbs = oldmat->nbs; 1857 a->Mbs = oldmat->Mbs; 1858 a->Nbs = oldmat->Nbs; 1859 1860 a->rstart = oldmat->rstart; 1861 a->rend = oldmat->rend; 1862 a->cstart = oldmat->cstart; 1863 a->cend = oldmat->cend; 1864 a->size = oldmat->size; 1865 a->rank = oldmat->rank; 1866 a->donotstash = oldmat->donotstash; 1867 a->roworiented = oldmat->roworiented; 1868 a->rowindices = 0; 1869 a->rowvalues = 0; 1870 a->getrowactive = PETSC_FALSE; 1871 a->barray = 0; 1872 a->rstart_bs = oldmat->rstart_bs; 1873 a->rend_bs = oldmat->rend_bs; 1874 a->cstart_bs = oldmat->cstart_bs; 1875 a->cend_bs = oldmat->cend_bs; 1876 1877 /* hash table stuff */ 1878 a->ht = 0; 1879 a->hd = 0; 1880 a->ht_size = 0; 1881 a->ht_flag = oldmat->ht_flag; 1882 a->ht_fact = oldmat->ht_fact; 1883 a->ht_total_ct = 0; 1884 a->ht_insert_ct = 0; 1885 1886 1887 a->rowners = (int*)PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 1888 PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 1889 a->cowners = a->rowners + a->size + 2; 1890 a->rowners_bs = a->cowners + a->size + 2; 1891 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1892 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1893 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 1894 if (oldmat->colmap) { 1895 #if defined (PETSC_USE_CTABLE) 1896 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1897 #else 1898 a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 1899 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 1900 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 1901 #endif 1902 } else a->colmap = 0; 1903 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 1904 a->garray = (int*)PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray); 1905 PLogObjectMemory(mat,len*sizeof(int)); 1906 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 1907 } else a->garray = 0; 1908 1909 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1910 PLogObjectParent(mat,a->lvec); 1911 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1912 1913 PLogObjectParent(mat,a->Mvctx); 1914 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1915 PLogObjectParent(mat,a->A); 1916 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1917 PLogObjectParent(mat,a->B); 1918 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 1919 ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 1920 if (flg) { 1921 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 1922 } 1923 *newmat = mat; 1924 PetscFunctionReturn(0); 1925 } 1926 1927 #include "petscsys.h" 1928 1929 #undef __FUNC__ 1930 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPISBAIJ" 1931 int MatLoad_MPISBAIJ(Viewer viewer,MatType type,Mat *newmat) 1932 { 1933 Mat A; 1934 int i,nz,ierr,j,rstart,rend,fd; 1935 Scalar *vals,*buf; 1936 MPI_Comm comm = ((PetscObject)viewer)->comm; 1937 MPI_Status status; 1938 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 1939 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 1940 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 1941 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 1942 int dcount,kmax,k,nzcount,tmp; 1943 1944 PetscFunctionBegin; 1945 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1946 1947 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1948 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1949 if (!rank) { 1950 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1951 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1952 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1953 if (header[3] < 0) { 1954 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPISBAIJ"); 1955 } 1956 } 1957 1958 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1959 M = header[1]; N = header[2]; 1960 1961 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 1962 1963 /* 1964 This code adds extra rows to make sure the number of rows is 1965 divisible by the blocksize 1966 */ 1967 Mbs = M/bs; 1968 extra_rows = bs - M + bs*(Mbs); 1969 if (extra_rows == bs) extra_rows = 0; 1970 else Mbs++; 1971 if (extra_rows &&!rank) { 1972 PLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"); 1973 } 1974 1975 /* determine ownership of all rows */ 1976 mbs = Mbs/size + ((Mbs % size) > rank); 1977 m = mbs * bs; 1978 rowners = (int*)PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners); 1979 browners = rowners + size + 1; 1980 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1981 rowners[0] = 0; 1982 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 1983 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 1984 rstart = rowners[rank]; 1985 rend = rowners[rank+1]; 1986 1987 /* distribute row lengths to all processors */ 1988 locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKPTRQ(locrowlens); 1989 if (!rank) { 1990 rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 1991 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1992 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1993 sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts); 1994 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 1995 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 1996 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1997 } else { 1998 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 1999 } 2000 2001 if (!rank) { /* procs[0] */ 2002 /* calculate the number of nonzeros on each processor */ 2003 procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz); 2004 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2005 for (i=0; i<size; i++) { 2006 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2007 procsnz[i] += rowlengths[j]; 2008 } 2009 } 2010 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2011 2012 /* determine max buffer needed and allocate it */ 2013 maxnz = 0; 2014 for (i=0; i<size; i++) { 2015 maxnz = PetscMax(maxnz,procsnz[i]); 2016 } 2017 cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols); 2018 2019 /* read in my part of the matrix column indices */ 2020 nz = procsnz[0]; 2021 ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 2022 mycols = ibuf; 2023 if (size == 1) nz -= extra_rows; 2024 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2025 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2026 2027 /* read in every ones (except the last) and ship off */ 2028 for (i=1; i<size-1; i++) { 2029 nz = procsnz[i]; 2030 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2031 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2032 } 2033 /* read in the stuff for the last proc */ 2034 if (size != 1) { 2035 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2036 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2037 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2038 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2039 } 2040 ierr = PetscFree(cols);CHKERRQ(ierr); 2041 } else { /* procs[i], i>0 */ 2042 /* determine buffer space needed for message */ 2043 nz = 0; 2044 for (i=0; i<m; i++) { 2045 nz += locrowlens[i]; 2046 } 2047 ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 2048 mycols = ibuf; 2049 /* receive message of column indices*/ 2050 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2051 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2052 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2053 } 2054 2055 /* loop over local rows, determining number of off diagonal entries */ 2056 dlens = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(dlens); 2057 odlens = dlens + (rend-rstart); 2058 mask = (int*)PetscMalloc(3*Mbs*sizeof(int));CHKPTRQ(mask); 2059 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2060 masked1 = mask + Mbs; 2061 masked2 = masked1 + Mbs; 2062 rowcount = 0; nzcount = 0; 2063 for (i=0; i<mbs; i++) { 2064 dcount = 0; 2065 odcount = 0; 2066 for (j=0; j<bs; j++) { 2067 kmax = locrowlens[rowcount]; 2068 for (k=0; k<kmax; k++) { 2069 tmp = mycols[nzcount++]/bs; /* block col. index */ 2070 if (!mask[tmp]) { 2071 mask[tmp] = 1; 2072 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2073 else masked1[dcount++] = tmp; /* entry in diag portion */ 2074 } 2075 } 2076 rowcount++; 2077 } 2078 2079 dlens[i] = dcount; /* d_nzz[i] */ 2080 odlens[i] = odcount; /* o_nzz[i] */ 2081 2082 /* zero out the mask elements we set */ 2083 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2084 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2085 } 2086 2087 /* create our matrix */ 2088 ierr = MatCreateMPISBAIJ(comm,bs,m,m,PETSC_DETERMINE,PETSC_DETERMINE,0,dlens,0,odlens,newmat); 2089 CHKERRQ(ierr); 2090 A = *newmat; 2091 MatSetOption(A,MAT_COLUMNS_SORTED); 2092 2093 if (!rank) { 2094 buf = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(buf); 2095 /* read in my part of the matrix numerical values */ 2096 nz = procsnz[0]; 2097 vals = buf; 2098 mycols = ibuf; 2099 if (size == 1) nz -= extra_rows; 2100 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2101 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2102 2103 /* insert into matrix */ 2104 jj = rstart*bs; 2105 for (i=0; i<m; i++) { 2106 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2107 mycols += locrowlens[i]; 2108 vals += locrowlens[i]; 2109 jj++; 2110 } 2111 2112 /* read in other processors (except the last one) and ship out */ 2113 for (i=1; i<size-1; i++) { 2114 nz = procsnz[i]; 2115 vals = buf; 2116 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2117 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2118 } 2119 /* the last proc */ 2120 if (size != 1){ 2121 nz = procsnz[i] - extra_rows; 2122 vals = buf; 2123 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2124 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2125 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2126 } 2127 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2128 2129 } else { 2130 /* receive numeric values */ 2131 buf = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(buf); 2132 2133 /* receive message of values*/ 2134 vals = buf; 2135 mycols = ibuf; 2136 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2137 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2138 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2139 2140 /* insert into matrix */ 2141 jj = rstart*bs; 2142 for (i=0; i<m; i++) { 2143 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2144 mycols += locrowlens[i]; 2145 vals += locrowlens[i]; 2146 jj++; 2147 } 2148 } 2149 2150 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2151 ierr = PetscFree(buf);CHKERRQ(ierr); 2152 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2153 ierr = PetscFree(rowners);CHKERRQ(ierr); 2154 ierr = PetscFree(dlens);CHKERRQ(ierr); 2155 ierr = PetscFree(mask);CHKERRQ(ierr); 2156 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2157 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2158 PetscFunctionReturn(0); 2159 } 2160 2161 #undef __FUNC__ 2162 #define __FUNC__ /*<a name=""></a>*/"MatMPISBAIJSetHashTableFactor" 2163 /*@ 2164 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2165 2166 Input Parameters: 2167 . mat - the matrix 2168 . fact - factor 2169 2170 Collective on Mat 2171 2172 Level: advanced 2173 2174 Notes: 2175 This can also be set by the command line option: -mat_use_hash_table fact 2176 2177 .keywords: matrix, hashtable, factor, HT 2178 2179 .seealso: MatSetOption() 2180 @*/ 2181 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2182 { 2183 PetscFunctionBegin; 2184 SETERRQ(1,1,"Function not yet written for SBAIJ format"); 2185 PetscFunctionReturn(0); 2186 } 2187