1 /*$Id: mpibaij.c,v 1.234 2001/09/25 22:56:49 balay Exp $*/ 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "src/vec/vecimpl.h" 5 6 EXTERN int MatSetUpMultiply_MPIBAIJ(Mat); 7 EXTERN int DisAssemble_MPIBAIJ(Mat); 8 EXTERN int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 9 EXTERN int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 10 EXTERN int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,PetscScalar *); 11 EXTERN int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,PetscScalar *,InsertMode); 12 EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 13 EXTERN int MatGetRow_SeqBAIJ(Mat,int,int*,int**,PetscScalar**); 14 EXTERN int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,PetscScalar**); 15 EXTERN int MatPrintHelp_SeqBAIJ(Mat); 16 EXTERN int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar*); 17 18 /* UGLY, ugly, ugly 19 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 20 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 21 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 22 converts the entries into single precision and then calls ..._MatScalar() to put them 23 into the single precision data structures. 24 */ 25 #if defined(PETSC_USE_MAT_SINGLE) 26 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 27 EXTERN int MatSetValues_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 28 EXTERN int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 29 EXTERN int MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 30 EXTERN int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 31 #else 32 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 33 #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 34 #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 35 #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 36 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 37 #endif 38 39 #undef __FUNCT__ 40 #define __FUNCT__ "MatGetRowMax_MPIBAIJ" 41 int MatGetRowMax_MPIBAIJ(Mat A,Vec v) 42 { 43 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 44 int ierr,i; 45 PetscScalar *va,*vb; 46 Vec vtmp; 47 48 PetscFunctionBegin; 49 50 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 51 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 52 53 ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr); 54 ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr); 55 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 56 57 for (i=0; i<A->m; i++){ 58 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i]; 59 } 60 61 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 62 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 63 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 64 65 PetscFunctionReturn(0); 66 } 67 68 EXTERN_C_BEGIN 69 #undef __FUNCT__ 70 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 71 int MatStoreValues_MPIBAIJ(Mat mat) 72 { 73 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 74 int ierr; 75 76 PetscFunctionBegin; 77 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 78 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 79 PetscFunctionReturn(0); 80 } 81 EXTERN_C_END 82 83 EXTERN_C_BEGIN 84 #undef __FUNCT__ 85 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 86 int MatRetrieveValues_MPIBAIJ(Mat mat) 87 { 88 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 89 int ierr; 90 91 PetscFunctionBegin; 92 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 93 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 94 PetscFunctionReturn(0); 95 } 96 EXTERN_C_END 97 98 /* 99 Local utility routine that creates a mapping from the global column 100 number to the local number in the off-diagonal part of the local 101 storage of the matrix. This is done in a non scable way since the 102 length of colmap equals the global matrix length. 103 */ 104 #undef __FUNCT__ 105 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 106 static int CreateColmap_MPIBAIJ_Private(Mat mat) 107 { 108 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 109 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 110 int nbs = B->nbs,i,bs=B->bs,ierr; 111 112 PetscFunctionBegin; 113 #if defined (PETSC_USE_CTABLE) 114 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 115 for (i=0; i<nbs; i++){ 116 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 117 } 118 #else 119 ierr = PetscMalloc((baij->Nbs+1)*sizeof(int),&baij->colmap);CHKERRQ(ierr); 120 PetscLogObjectMemory(mat,baij->Nbs*sizeof(int)); 121 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 122 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 123 #endif 124 PetscFunctionReturn(0); 125 } 126 127 #define CHUNKSIZE 10 128 129 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 130 { \ 131 \ 132 brow = row/bs; \ 133 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 134 rmax = aimax[brow]; nrow = ailen[brow]; \ 135 bcol = col/bs; \ 136 ridx = row % bs; cidx = col % bs; \ 137 low = 0; high = nrow; \ 138 while (high-low > 3) { \ 139 t = (low+high)/2; \ 140 if (rp[t] > bcol) high = t; \ 141 else low = t; \ 142 } \ 143 for (_i=low; _i<high; _i++) { \ 144 if (rp[_i] > bcol) break; \ 145 if (rp[_i] == bcol) { \ 146 bap = ap + bs2*_i + bs*cidx + ridx; \ 147 if (addv == ADD_VALUES) *bap += value; \ 148 else *bap = value; \ 149 goto a_noinsert; \ 150 } \ 151 } \ 152 if (a->nonew == 1) goto a_noinsert; \ 153 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 154 if (nrow >= rmax) { \ 155 /* there is no extra room in row, therefore enlarge */ \ 156 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 157 MatScalar *new_a; \ 158 \ 159 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 160 \ 161 /* malloc new storage space */ \ 162 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 163 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 164 new_j = (int*)(new_a + bs2*new_nz); \ 165 new_i = new_j + new_nz; \ 166 \ 167 /* copy over old data into new slots */ \ 168 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 169 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 170 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 171 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 172 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 173 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 174 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 175 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 176 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 177 /* free up old matrix storage */ \ 178 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 179 if (!a->singlemalloc) { \ 180 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 181 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 182 } \ 183 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 184 a->singlemalloc = PETSC_TRUE; \ 185 \ 186 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 187 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 188 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 189 a->maxnz += bs2*CHUNKSIZE; \ 190 a->reallocs++; \ 191 a->nz++; \ 192 } \ 193 N = nrow++ - 1; \ 194 /* shift up all the later entries in this row */ \ 195 for (ii=N; ii>=_i; ii--) { \ 196 rp[ii+1] = rp[ii]; \ 197 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 198 } \ 199 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 200 rp[_i] = bcol; \ 201 ap[bs2*_i + bs*cidx + ridx] = value; \ 202 a_noinsert:; \ 203 ailen[brow] = nrow; \ 204 } 205 206 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 207 { \ 208 brow = row/bs; \ 209 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 210 rmax = bimax[brow]; nrow = bilen[brow]; \ 211 bcol = col/bs; \ 212 ridx = row % bs; cidx = col % bs; \ 213 low = 0; high = nrow; \ 214 while (high-low > 3) { \ 215 t = (low+high)/2; \ 216 if (rp[t] > bcol) high = t; \ 217 else low = t; \ 218 } \ 219 for (_i=low; _i<high; _i++) { \ 220 if (rp[_i] > bcol) break; \ 221 if (rp[_i] == bcol) { \ 222 bap = ap + bs2*_i + bs*cidx + ridx; \ 223 if (addv == ADD_VALUES) *bap += value; \ 224 else *bap = value; \ 225 goto b_noinsert; \ 226 } \ 227 } \ 228 if (b->nonew == 1) goto b_noinsert; \ 229 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 230 if (nrow >= rmax) { \ 231 /* there is no extra room in row, therefore enlarge */ \ 232 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 233 MatScalar *new_a; \ 234 \ 235 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 236 \ 237 /* malloc new storage space */ \ 238 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 239 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 240 new_j = (int*)(new_a + bs2*new_nz); \ 241 new_i = new_j + new_nz; \ 242 \ 243 /* copy over old data into new slots */ \ 244 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 245 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 246 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 247 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 248 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 249 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 250 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 251 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 252 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 253 /* free up old matrix storage */ \ 254 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 255 if (!b->singlemalloc) { \ 256 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 257 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 258 } \ 259 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 260 b->singlemalloc = PETSC_TRUE; \ 261 \ 262 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 263 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 264 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 265 b->maxnz += bs2*CHUNKSIZE; \ 266 b->reallocs++; \ 267 b->nz++; \ 268 } \ 269 N = nrow++ - 1; \ 270 /* shift up all the later entries in this row */ \ 271 for (ii=N; ii>=_i; ii--) { \ 272 rp[ii+1] = rp[ii]; \ 273 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 274 } \ 275 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 276 rp[_i] = bcol; \ 277 ap[bs2*_i + bs*cidx + ridx] = value; \ 278 b_noinsert:; \ 279 bilen[brow] = nrow; \ 280 } 281 282 #if defined(PETSC_USE_MAT_SINGLE) 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatSetValues_MPIBAIJ" 285 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 286 { 287 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 288 int ierr,i,N = m*n; 289 MatScalar *vsingle; 290 291 PetscFunctionBegin; 292 if (N > b->setvalueslen) { 293 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 294 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 295 b->setvalueslen = N; 296 } 297 vsingle = b->setvaluescopy; 298 299 for (i=0; i<N; i++) { 300 vsingle[i] = v[i]; 301 } 302 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 303 PetscFunctionReturn(0); 304 } 305 306 #undef __FUNCT__ 307 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 308 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 309 { 310 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 311 int ierr,i,N = m*n*b->bs2; 312 MatScalar *vsingle; 313 314 PetscFunctionBegin; 315 if (N > b->setvalueslen) { 316 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 317 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 318 b->setvalueslen = N; 319 } 320 vsingle = b->setvaluescopy; 321 for (i=0; i<N; i++) { 322 vsingle[i] = v[i]; 323 } 324 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 325 PetscFunctionReturn(0); 326 } 327 328 #undef __FUNCT__ 329 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 330 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 331 { 332 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 333 int ierr,i,N = m*n; 334 MatScalar *vsingle; 335 336 PetscFunctionBegin; 337 if (N > b->setvalueslen) { 338 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 339 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 340 b->setvalueslen = N; 341 } 342 vsingle = b->setvaluescopy; 343 for (i=0; i<N; i++) { 344 vsingle[i] = v[i]; 345 } 346 ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 347 PetscFunctionReturn(0); 348 } 349 350 #undef __FUNCT__ 351 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 352 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 353 { 354 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 355 int ierr,i,N = m*n*b->bs2; 356 MatScalar *vsingle; 357 358 PetscFunctionBegin; 359 if (N > b->setvalueslen) { 360 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 361 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 362 b->setvalueslen = N; 363 } 364 vsingle = b->setvaluescopy; 365 for (i=0; i<N; i++) { 366 vsingle[i] = v[i]; 367 } 368 ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 369 PetscFunctionReturn(0); 370 } 371 #endif 372 373 #undef __FUNCT__ 374 #define __FUNCT__ "MatSetValues_MPIBAIJ" 375 int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 376 { 377 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 378 MatScalar value; 379 PetscTruth roworiented = baij->roworiented; 380 int ierr,i,j,row,col; 381 int rstart_orig=baij->rstart_bs; 382 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 383 int cend_orig=baij->cend_bs,bs=baij->bs; 384 385 /* Some Variables required in the macro */ 386 Mat A = baij->A; 387 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 388 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 389 MatScalar *aa=a->a; 390 391 Mat B = baij->B; 392 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 393 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 394 MatScalar *ba=b->a; 395 396 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 397 int low,high,t,ridx,cidx,bs2=a->bs2; 398 MatScalar *ap,*bap; 399 400 PetscFunctionBegin; 401 for (i=0; i<m; i++) { 402 if (im[i] < 0) continue; 403 #if defined(PETSC_USE_BOPT_g) 404 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 405 #endif 406 if (im[i] >= rstart_orig && im[i] < rend_orig) { 407 row = im[i] - rstart_orig; 408 for (j=0; j<n; j++) { 409 if (in[j] >= cstart_orig && in[j] < cend_orig){ 410 col = in[j] - cstart_orig; 411 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 412 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 413 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 414 } else if (in[j] < 0) continue; 415 #if defined(PETSC_USE_BOPT_g) 416 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 417 #endif 418 else { 419 if (mat->was_assembled) { 420 if (!baij->colmap) { 421 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 422 } 423 #if defined (PETSC_USE_CTABLE) 424 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 425 col = col - 1; 426 #else 427 col = baij->colmap[in[j]/bs] - 1; 428 #endif 429 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 430 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 431 col = in[j]; 432 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 433 B = baij->B; 434 b = (Mat_SeqBAIJ*)(B)->data; 435 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 436 ba=b->a; 437 } else col += in[j]%bs; 438 } else col = in[j]; 439 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 440 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 441 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 442 } 443 } 444 } else { 445 if (!baij->donotstash) { 446 if (roworiented) { 447 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 448 } else { 449 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 450 } 451 } 452 } 453 } 454 PetscFunctionReturn(0); 455 } 456 457 #undef __FUNCT__ 458 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar" 459 int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 460 { 461 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 462 MatScalar *value,*barray=baij->barray; 463 PetscTruth roworiented = baij->roworiented; 464 int ierr,i,j,ii,jj,row,col,rstart=baij->rstart; 465 int rend=baij->rend,cstart=baij->cstart,stepval; 466 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 467 468 PetscFunctionBegin; 469 if(!barray) { 470 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 471 baij->barray = barray; 472 } 473 474 if (roworiented) { 475 stepval = (n-1)*bs; 476 } else { 477 stepval = (m-1)*bs; 478 } 479 for (i=0; i<m; i++) { 480 if (im[i] < 0) continue; 481 #if defined(PETSC_USE_BOPT_g) 482 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs); 483 #endif 484 if (im[i] >= rstart && im[i] < rend) { 485 row = im[i] - rstart; 486 for (j=0; j<n; j++) { 487 /* If NumCol = 1 then a copy is not required */ 488 if ((roworiented) && (n == 1)) { 489 barray = v + i*bs2; 490 } else if((!roworiented) && (m == 1)) { 491 barray = v + j*bs2; 492 } else { /* Here a copy is required */ 493 if (roworiented) { 494 value = v + i*(stepval+bs)*bs + j*bs; 495 } else { 496 value = v + j*(stepval+bs)*bs + i*bs; 497 } 498 for (ii=0; ii<bs; ii++,value+=stepval) { 499 for (jj=0; jj<bs; jj++) { 500 *barray++ = *value++; 501 } 502 } 503 barray -=bs2; 504 } 505 506 if (in[j] >= cstart && in[j] < cend){ 507 col = in[j] - cstart; 508 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 509 } 510 else if (in[j] < 0) continue; 511 #if defined(PETSC_USE_BOPT_g) 512 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs);} 513 #endif 514 else { 515 if (mat->was_assembled) { 516 if (!baij->colmap) { 517 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 518 } 519 520 #if defined(PETSC_USE_BOPT_g) 521 #if defined (PETSC_USE_CTABLE) 522 { int data; 523 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 524 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 525 } 526 #else 527 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 528 #endif 529 #endif 530 #if defined (PETSC_USE_CTABLE) 531 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 532 col = (col - 1)/bs; 533 #else 534 col = (baij->colmap[in[j]] - 1)/bs; 535 #endif 536 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 537 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 538 col = in[j]; 539 } 540 } 541 else col = in[j]; 542 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 543 } 544 } 545 } else { 546 if (!baij->donotstash) { 547 if (roworiented) { 548 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 549 } else { 550 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 551 } 552 } 553 } 554 } 555 PetscFunctionReturn(0); 556 } 557 558 #define HASH_KEY 0.6180339887 559 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 560 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 561 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 562 #undef __FUNCT__ 563 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar" 564 int MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 565 { 566 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 567 PetscTruth roworiented = baij->roworiented; 568 int ierr,i,j,row,col; 569 int rstart_orig=baij->rstart_bs; 570 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 571 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 572 PetscReal tmp; 573 MatScalar **HD = baij->hd,value; 574 #if defined(PETSC_USE_BOPT_g) 575 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 576 #endif 577 578 PetscFunctionBegin; 579 580 for (i=0; i<m; i++) { 581 #if defined(PETSC_USE_BOPT_g) 582 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 583 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 584 #endif 585 row = im[i]; 586 if (row >= rstart_orig && row < rend_orig) { 587 for (j=0; j<n; j++) { 588 col = in[j]; 589 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 590 /* Look up into the Hash Table */ 591 key = (row/bs)*Nbs+(col/bs)+1; 592 h1 = HASH(size,key,tmp); 593 594 595 idx = h1; 596 #if defined(PETSC_USE_BOPT_g) 597 insert_ct++; 598 total_ct++; 599 if (HT[idx] != key) { 600 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 601 if (idx == size) { 602 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 603 if (idx == h1) { 604 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 605 } 606 } 607 } 608 #else 609 if (HT[idx] != key) { 610 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 611 if (idx == size) { 612 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 613 if (idx == h1) { 614 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 615 } 616 } 617 } 618 #endif 619 /* A HASH table entry is found, so insert the values at the correct address */ 620 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 621 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 622 } 623 } else { 624 if (!baij->donotstash) { 625 if (roworiented) { 626 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 627 } else { 628 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 629 } 630 } 631 } 632 } 633 #if defined(PETSC_USE_BOPT_g) 634 baij->ht_total_ct = total_ct; 635 baij->ht_insert_ct = insert_ct; 636 #endif 637 PetscFunctionReturn(0); 638 } 639 640 #undef __FUNCT__ 641 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 642 int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 643 { 644 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 645 PetscTruth roworiented = baij->roworiented; 646 int ierr,i,j,ii,jj,row,col; 647 int rstart=baij->rstart ; 648 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 649 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 650 PetscReal tmp; 651 MatScalar **HD = baij->hd,*baij_a; 652 MatScalar *v_t,*value; 653 #if defined(PETSC_USE_BOPT_g) 654 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 655 #endif 656 657 PetscFunctionBegin; 658 659 if (roworiented) { 660 stepval = (n-1)*bs; 661 } else { 662 stepval = (m-1)*bs; 663 } 664 for (i=0; i<m; i++) { 665 #if defined(PETSC_USE_BOPT_g) 666 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 667 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 668 #endif 669 row = im[i]; 670 v_t = v + i*bs2; 671 if (row >= rstart && row < rend) { 672 for (j=0; j<n; j++) { 673 col = in[j]; 674 675 /* Look up into the Hash Table */ 676 key = row*Nbs+col+1; 677 h1 = HASH(size,key,tmp); 678 679 idx = h1; 680 #if defined(PETSC_USE_BOPT_g) 681 total_ct++; 682 insert_ct++; 683 if (HT[idx] != key) { 684 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 685 if (idx == size) { 686 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 687 if (idx == h1) { 688 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 689 } 690 } 691 } 692 #else 693 if (HT[idx] != key) { 694 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 695 if (idx == size) { 696 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 697 if (idx == h1) { 698 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table"); 699 } 700 } 701 } 702 #endif 703 baij_a = HD[idx]; 704 if (roworiented) { 705 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 706 /* value = v + (i*(stepval+bs)+j)*bs; */ 707 value = v_t; 708 v_t += bs; 709 if (addv == ADD_VALUES) { 710 for (ii=0; ii<bs; ii++,value+=stepval) { 711 for (jj=ii; jj<bs2; jj+=bs) { 712 baij_a[jj] += *value++; 713 } 714 } 715 } else { 716 for (ii=0; ii<bs; ii++,value+=stepval) { 717 for (jj=ii; jj<bs2; jj+=bs) { 718 baij_a[jj] = *value++; 719 } 720 } 721 } 722 } else { 723 value = v + j*(stepval+bs)*bs + i*bs; 724 if (addv == ADD_VALUES) { 725 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 726 for (jj=0; jj<bs; jj++) { 727 baij_a[jj] += *value++; 728 } 729 } 730 } else { 731 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 732 for (jj=0; jj<bs; jj++) { 733 baij_a[jj] = *value++; 734 } 735 } 736 } 737 } 738 } 739 } else { 740 if (!baij->donotstash) { 741 if (roworiented) { 742 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 743 } else { 744 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 745 } 746 } 747 } 748 } 749 #if defined(PETSC_USE_BOPT_g) 750 baij->ht_total_ct = total_ct; 751 baij->ht_insert_ct = insert_ct; 752 #endif 753 PetscFunctionReturn(0); 754 } 755 756 #undef __FUNCT__ 757 #define __FUNCT__ "MatGetValues_MPIBAIJ" 758 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 759 { 760 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 761 int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 762 int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 763 764 PetscFunctionBegin; 765 for (i=0; i<m; i++) { 766 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 767 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 768 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 769 row = idxm[i] - bsrstart; 770 for (j=0; j<n; j++) { 771 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 772 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 773 if (idxn[j] >= bscstart && idxn[j] < bscend){ 774 col = idxn[j] - bscstart; 775 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 776 } else { 777 if (!baij->colmap) { 778 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 779 } 780 #if defined (PETSC_USE_CTABLE) 781 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 782 data --; 783 #else 784 data = baij->colmap[idxn[j]/bs]-1; 785 #endif 786 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 787 else { 788 col = data + idxn[j]%bs; 789 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 790 } 791 } 792 } 793 } else { 794 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 795 } 796 } 797 PetscFunctionReturn(0); 798 } 799 800 #undef __FUNCT__ 801 #define __FUNCT__ "MatNorm_MPIBAIJ" 802 int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 803 { 804 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 805 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 806 int ierr,i,bs2=baij->bs2; 807 PetscReal sum = 0.0; 808 MatScalar *v; 809 810 PetscFunctionBegin; 811 if (baij->size == 1) { 812 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 813 } else { 814 if (type == NORM_FROBENIUS) { 815 v = amat->a; 816 for (i=0; i<amat->nz*bs2; i++) { 817 #if defined(PETSC_USE_COMPLEX) 818 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 819 #else 820 sum += (*v)*(*v); v++; 821 #endif 822 } 823 v = bmat->a; 824 for (i=0; i<bmat->nz*bs2; i++) { 825 #if defined(PETSC_USE_COMPLEX) 826 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 827 #else 828 sum += (*v)*(*v); v++; 829 #endif 830 } 831 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 832 *nrm = sqrt(*nrm); 833 } else { 834 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 835 } 836 } 837 PetscFunctionReturn(0); 838 } 839 840 841 /* 842 Creates the hash table, and sets the table 843 This table is created only once. 844 If new entried need to be added to the matrix 845 then the hash table has to be destroyed and 846 recreated. 847 */ 848 #undef __FUNCT__ 849 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 850 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 851 { 852 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 853 Mat A = baij->A,B=baij->B; 854 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 855 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 856 int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 857 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 858 int *HT,key; 859 MatScalar **HD; 860 PetscReal tmp; 861 #if defined(PETSC_USE_BOPT_g) 862 int ct=0,max=0; 863 #endif 864 865 PetscFunctionBegin; 866 baij->ht_size=(int)(factor*nz); 867 size = baij->ht_size; 868 869 if (baij->ht) { 870 PetscFunctionReturn(0); 871 } 872 873 /* Allocate Memory for Hash Table */ 874 ierr = PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 875 baij->ht = (int*)(baij->hd + size); 876 HD = baij->hd; 877 HT = baij->ht; 878 879 880 ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(PetscScalar*)));CHKERRQ(ierr); 881 882 883 /* Loop Over A */ 884 for (i=0; i<a->mbs; i++) { 885 for (j=ai[i]; j<ai[i+1]; j++) { 886 row = i+rstart; 887 col = aj[j]+cstart; 888 889 key = row*Nbs + col + 1; 890 h1 = HASH(size,key,tmp); 891 for (k=0; k<size; k++){ 892 if (HT[(h1+k)%size] == 0.0) { 893 HT[(h1+k)%size] = key; 894 HD[(h1+k)%size] = a->a + j*bs2; 895 break; 896 #if defined(PETSC_USE_BOPT_g) 897 } else { 898 ct++; 899 #endif 900 } 901 } 902 #if defined(PETSC_USE_BOPT_g) 903 if (k> max) max = k; 904 #endif 905 } 906 } 907 /* Loop Over B */ 908 for (i=0; i<b->mbs; i++) { 909 for (j=bi[i]; j<bi[i+1]; j++) { 910 row = i+rstart; 911 col = garray[bj[j]]; 912 key = row*Nbs + col + 1; 913 h1 = HASH(size,key,tmp); 914 for (k=0; k<size; k++){ 915 if (HT[(h1+k)%size] == 0.0) { 916 HT[(h1+k)%size] = key; 917 HD[(h1+k)%size] = b->a + j*bs2; 918 break; 919 #if defined(PETSC_USE_BOPT_g) 920 } else { 921 ct++; 922 #endif 923 } 924 } 925 #if defined(PETSC_USE_BOPT_g) 926 if (k> max) max = k; 927 #endif 928 } 929 } 930 931 /* Print Summary */ 932 #if defined(PETSC_USE_BOPT_g) 933 for (i=0,j=0; i<size; i++) { 934 if (HT[i]) {j++;} 935 } 936 PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max); 937 #endif 938 PetscFunctionReturn(0); 939 } 940 941 #undef __FUNCT__ 942 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 943 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 944 { 945 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 946 int ierr,nstash,reallocs; 947 InsertMode addv; 948 949 PetscFunctionBegin; 950 if (baij->donotstash) { 951 PetscFunctionReturn(0); 952 } 953 954 /* make sure all processors are either in INSERTMODE or ADDMODE */ 955 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 956 if (addv == (ADD_VALUES|INSERT_VALUES)) { 957 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 958 } 959 mat->insertmode = addv; /* in case this processor had no cache */ 960 961 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 962 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 963 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 964 PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 965 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 966 PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 967 PetscFunctionReturn(0); 968 } 969 970 EXTERN int MatUseDSCPACK_MPIBAIJ(Mat); 971 #undef __FUNCT__ 972 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 973 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 974 { 975 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 976 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 977 int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 978 int *row,*col,other_disassembled; 979 PetscTruth r1,r2,r3; 980 MatScalar *val; 981 InsertMode addv = mat->insertmode; 982 #if defined(PETSC_HAVE_DSCPACK) 983 PetscTruth flag; 984 #endif 985 986 PetscFunctionBegin; 987 if (!baij->donotstash) { 988 while (1) { 989 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 990 if (!flg) break; 991 992 for (i=0; i<n;) { 993 /* Now identify the consecutive vals belonging to the same row */ 994 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 995 if (j < n) ncols = j-i; 996 else ncols = n-i; 997 /* Now assemble all these values with a single function call */ 998 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 999 i = j; 1000 } 1001 } 1002 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 1003 /* Now process the block-stash. Since the values are stashed column-oriented, 1004 set the roworiented flag to column oriented, and after MatSetValues() 1005 restore the original flags */ 1006 r1 = baij->roworiented; 1007 r2 = a->roworiented; 1008 r3 = b->roworiented; 1009 baij->roworiented = PETSC_FALSE; 1010 a->roworiented = PETSC_FALSE; 1011 b->roworiented = PETSC_FALSE; 1012 while (1) { 1013 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1014 if (!flg) break; 1015 1016 for (i=0; i<n;) { 1017 /* Now identify the consecutive vals belonging to the same row */ 1018 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1019 if (j < n) ncols = j-i; 1020 else ncols = n-i; 1021 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1022 i = j; 1023 } 1024 } 1025 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1026 baij->roworiented = r1; 1027 a->roworiented = r2; 1028 b->roworiented = r3; 1029 } 1030 1031 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1032 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1033 1034 /* determine if any processor has disassembled, if so we must 1035 also disassemble ourselfs, in order that we may reassemble. */ 1036 /* 1037 if nonzero structure of submatrix B cannot change then we know that 1038 no processor disassembled thus we can skip this stuff 1039 */ 1040 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1041 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1042 if (mat->was_assembled && !other_disassembled) { 1043 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1044 } 1045 } 1046 1047 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1048 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1049 } 1050 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1051 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1052 1053 #if defined(PETSC_USE_BOPT_g) 1054 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1055 PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 1056 baij->ht_total_ct = 0; 1057 baij->ht_insert_ct = 0; 1058 } 1059 #endif 1060 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1061 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1062 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1063 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1064 } 1065 1066 if (baij->rowvalues) { 1067 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1068 baij->rowvalues = 0; 1069 } 1070 #if defined(PETSC_HAVE_DSCPACK) 1071 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_baij_dscpack",&flag);CHKERRQ(ierr); 1072 if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(mat);CHKERRQ(ierr); } 1073 #endif 1074 PetscFunctionReturn(0); 1075 } 1076 1077 extern int MatMPIBAIJFactorInfo_DSCPACK(Mat,PetscViewer); 1078 1079 #undef __FUNCT__ 1080 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 1081 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1082 { 1083 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1084 int ierr,bs = baij->bs,size = baij->size,rank = baij->rank; 1085 PetscTruth isascii,isdraw; 1086 PetscViewer sviewer; 1087 PetscViewerFormat format; 1088 1089 PetscFunctionBegin; 1090 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 1091 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1092 if (isascii) { 1093 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1094 if (format == PETSC_VIEWER_ASCII_INFO_LONG) { 1095 MatInfo info; 1096 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1097 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1098 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 1099 rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 1100 baij->bs,(int)info.memory);CHKERRQ(ierr); 1101 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1102 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1103 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1104 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1105 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1106 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1107 PetscFunctionReturn(0); 1108 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1109 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 1110 PetscFunctionReturn(0); 1111 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1112 #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX) 1113 ierr = MatMPIBAIJFactorInfo_DSCPACK(mat,viewer);CHKERRQ(ierr); 1114 #endif 1115 PetscFunctionReturn(0); 1116 } 1117 } 1118 1119 if (isdraw) { 1120 PetscDraw draw; 1121 PetscTruth isnull; 1122 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1123 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1124 } 1125 1126 if (size == 1) { 1127 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 1128 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1129 } else { 1130 /* assemble the entire matrix onto first processor. */ 1131 Mat A; 1132 Mat_SeqBAIJ *Aloc; 1133 int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1134 MatScalar *a; 1135 1136 if (!rank) { 1137 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1138 } else { 1139 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1140 } 1141 PetscLogObjectParent(mat,A); 1142 1143 /* copy over the A part */ 1144 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1145 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1146 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 1147 1148 for (i=0; i<mbs; i++) { 1149 rvals[0] = bs*(baij->rstart + i); 1150 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1151 for (j=ai[i]; j<ai[i+1]; j++) { 1152 col = (baij->cstart+aj[j])*bs; 1153 for (k=0; k<bs; k++) { 1154 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1155 col++; a += bs; 1156 } 1157 } 1158 } 1159 /* copy over the B part */ 1160 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1161 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1162 for (i=0; i<mbs; i++) { 1163 rvals[0] = bs*(baij->rstart + i); 1164 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1165 for (j=ai[i]; j<ai[i+1]; j++) { 1166 col = baij->garray[aj[j]]*bs; 1167 for (k=0; k<bs; k++) { 1168 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1169 col++; a += bs; 1170 } 1171 } 1172 } 1173 ierr = PetscFree(rvals);CHKERRQ(ierr); 1174 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1175 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1176 /* 1177 Everyone has to call to draw the matrix since the graphics waits are 1178 synchronized across all processors that share the PetscDraw object 1179 */ 1180 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1181 if (!rank) { 1182 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1183 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1184 } 1185 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1186 ierr = MatDestroy(A);CHKERRQ(ierr); 1187 } 1188 PetscFunctionReturn(0); 1189 } 1190 1191 #undef __FUNCT__ 1192 #define __FUNCT__ "MatView_MPIBAIJ" 1193 int MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1194 { 1195 int ierr; 1196 PetscTruth isascii,isdraw,issocket,isbinary; 1197 1198 PetscFunctionBegin; 1199 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 1200 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1201 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1202 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1203 if (isascii || isdraw || issocket || isbinary) { 1204 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1205 } else { 1206 SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1207 } 1208 PetscFunctionReturn(0); 1209 } 1210 1211 #undef __FUNCT__ 1212 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1213 int MatDestroy_MPIBAIJ(Mat mat) 1214 { 1215 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1216 int ierr; 1217 1218 PetscFunctionBegin; 1219 #if defined(PETSC_USE_LOG) 1220 PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N); 1221 #endif 1222 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1223 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1224 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1225 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1226 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1227 #if defined (PETSC_USE_CTABLE) 1228 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1229 #else 1230 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1231 #endif 1232 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1233 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1234 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1235 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1236 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1237 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1238 #if defined(PETSC_USE_MAT_SINGLE) 1239 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 1240 #endif 1241 ierr = PetscFree(baij);CHKERRQ(ierr); 1242 PetscFunctionReturn(0); 1243 } 1244 1245 #undef __FUNCT__ 1246 #define __FUNCT__ "MatMult_MPIBAIJ" 1247 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1248 { 1249 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1250 int ierr,nt; 1251 1252 PetscFunctionBegin; 1253 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1254 if (nt != A->n) { 1255 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1256 } 1257 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1258 if (nt != A->m) { 1259 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1260 } 1261 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1262 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1263 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1264 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1265 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1266 PetscFunctionReturn(0); 1267 } 1268 1269 #undef __FUNCT__ 1270 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1271 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1272 { 1273 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1274 int ierr; 1275 1276 PetscFunctionBegin; 1277 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1278 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1279 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1280 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1281 PetscFunctionReturn(0); 1282 } 1283 1284 #undef __FUNCT__ 1285 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1286 int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1287 { 1288 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1289 int ierr; 1290 1291 PetscFunctionBegin; 1292 /* do nondiagonal part */ 1293 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1294 /* send it on its way */ 1295 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1296 /* do local part */ 1297 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1298 /* receive remote parts: note this assumes the values are not actually */ 1299 /* inserted in yy until the next line, which is true for my implementation*/ 1300 /* but is not perhaps always true. */ 1301 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1302 PetscFunctionReturn(0); 1303 } 1304 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1307 int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1308 { 1309 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1310 int ierr; 1311 1312 PetscFunctionBegin; 1313 /* do nondiagonal part */ 1314 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1315 /* send it on its way */ 1316 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1317 /* do local part */ 1318 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1319 /* receive remote parts: note this assumes the values are not actually */ 1320 /* inserted in yy until the next line, which is true for my implementation*/ 1321 /* but is not perhaps always true. */ 1322 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1323 PetscFunctionReturn(0); 1324 } 1325 1326 /* 1327 This only works correctly for square matrices where the subblock A->A is the 1328 diagonal block 1329 */ 1330 #undef __FUNCT__ 1331 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1332 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1333 { 1334 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1335 int ierr; 1336 1337 PetscFunctionBegin; 1338 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1339 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1340 PetscFunctionReturn(0); 1341 } 1342 1343 #undef __FUNCT__ 1344 #define __FUNCT__ "MatScale_MPIBAIJ" 1345 int MatScale_MPIBAIJ(PetscScalar *aa,Mat A) 1346 { 1347 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1348 int ierr; 1349 1350 PetscFunctionBegin; 1351 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1352 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1353 PetscFunctionReturn(0); 1354 } 1355 1356 #undef __FUNCT__ 1357 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1358 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1359 { 1360 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1361 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1362 int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1363 int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1364 int *cmap,*idx_p,cstart = mat->cstart; 1365 1366 PetscFunctionBegin; 1367 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1368 mat->getrowactive = PETSC_TRUE; 1369 1370 if (!mat->rowvalues && (idx || v)) { 1371 /* 1372 allocate enough space to hold information from the longest row. 1373 */ 1374 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1375 int max = 1,mbs = mat->mbs,tmp; 1376 for (i=0; i<mbs; i++) { 1377 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1378 if (max < tmp) { max = tmp; } 1379 } 1380 ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1381 mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1382 } 1383 1384 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1385 lrow = row - brstart; 1386 1387 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1388 if (!v) {pvA = 0; pvB = 0;} 1389 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1390 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1391 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1392 nztot = nzA + nzB; 1393 1394 cmap = mat->garray; 1395 if (v || idx) { 1396 if (nztot) { 1397 /* Sort by increasing column numbers, assuming A and B already sorted */ 1398 int imark = -1; 1399 if (v) { 1400 *v = v_p = mat->rowvalues; 1401 for (i=0; i<nzB; i++) { 1402 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1403 else break; 1404 } 1405 imark = i; 1406 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1407 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1408 } 1409 if (idx) { 1410 *idx = idx_p = mat->rowindices; 1411 if (imark > -1) { 1412 for (i=0; i<imark; i++) { 1413 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1414 } 1415 } else { 1416 for (i=0; i<nzB; i++) { 1417 if (cmap[cworkB[i]/bs] < cstart) 1418 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1419 else break; 1420 } 1421 imark = i; 1422 } 1423 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1424 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1425 } 1426 } else { 1427 if (idx) *idx = 0; 1428 if (v) *v = 0; 1429 } 1430 } 1431 *nz = nztot; 1432 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1433 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1434 PetscFunctionReturn(0); 1435 } 1436 1437 #undef __FUNCT__ 1438 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1439 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1440 { 1441 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1442 1443 PetscFunctionBegin; 1444 if (baij->getrowactive == PETSC_FALSE) { 1445 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1446 } 1447 baij->getrowactive = PETSC_FALSE; 1448 PetscFunctionReturn(0); 1449 } 1450 1451 #undef __FUNCT__ 1452 #define __FUNCT__ "MatGetBlockSize_MPIBAIJ" 1453 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1454 { 1455 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1456 1457 PetscFunctionBegin; 1458 *bs = baij->bs; 1459 PetscFunctionReturn(0); 1460 } 1461 1462 #undef __FUNCT__ 1463 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1464 int MatZeroEntries_MPIBAIJ(Mat A) 1465 { 1466 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1467 int ierr; 1468 1469 PetscFunctionBegin; 1470 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1471 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1472 PetscFunctionReturn(0); 1473 } 1474 1475 #undef __FUNCT__ 1476 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1477 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1478 { 1479 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1480 Mat A = a->A,B = a->B; 1481 int ierr; 1482 PetscReal isend[5],irecv[5]; 1483 1484 PetscFunctionBegin; 1485 info->block_size = (PetscReal)a->bs; 1486 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1487 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1488 isend[3] = info->memory; isend[4] = info->mallocs; 1489 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1490 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1491 isend[3] += info->memory; isend[4] += info->mallocs; 1492 if (flag == MAT_LOCAL) { 1493 info->nz_used = isend[0]; 1494 info->nz_allocated = isend[1]; 1495 info->nz_unneeded = isend[2]; 1496 info->memory = isend[3]; 1497 info->mallocs = isend[4]; 1498 } else if (flag == MAT_GLOBAL_MAX) { 1499 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1500 info->nz_used = irecv[0]; 1501 info->nz_allocated = irecv[1]; 1502 info->nz_unneeded = irecv[2]; 1503 info->memory = irecv[3]; 1504 info->mallocs = irecv[4]; 1505 } else if (flag == MAT_GLOBAL_SUM) { 1506 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1507 info->nz_used = irecv[0]; 1508 info->nz_allocated = irecv[1]; 1509 info->nz_unneeded = irecv[2]; 1510 info->memory = irecv[3]; 1511 info->mallocs = irecv[4]; 1512 } else { 1513 SETERRQ1(1,"Unknown MatInfoType argument %d",flag); 1514 } 1515 info->rows_global = (PetscReal)A->M; 1516 info->columns_global = (PetscReal)A->N; 1517 info->rows_local = (PetscReal)A->m; 1518 info->columns_local = (PetscReal)A->N; 1519 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1520 info->fill_ratio_needed = 0; 1521 info->factor_mallocs = 0; 1522 PetscFunctionReturn(0); 1523 } 1524 1525 #undef __FUNCT__ 1526 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1527 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1528 { 1529 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1530 int ierr; 1531 1532 PetscFunctionBegin; 1533 switch (op) { 1534 case MAT_NO_NEW_NONZERO_LOCATIONS: 1535 case MAT_YES_NEW_NONZERO_LOCATIONS: 1536 case MAT_COLUMNS_UNSORTED: 1537 case MAT_COLUMNS_SORTED: 1538 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1539 case MAT_KEEP_ZEROED_ROWS: 1540 case MAT_NEW_NONZERO_LOCATION_ERR: 1541 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1542 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1543 break; 1544 case MAT_ROW_ORIENTED: 1545 a->roworiented = PETSC_TRUE; 1546 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1547 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1548 break; 1549 case MAT_ROWS_SORTED: 1550 case MAT_ROWS_UNSORTED: 1551 case MAT_YES_NEW_DIAGONALS: 1552 case MAT_USE_SINGLE_PRECISION_SOLVES: 1553 PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1554 break; 1555 case MAT_COLUMN_ORIENTED: 1556 a->roworiented = PETSC_FALSE; 1557 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1558 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1559 break; 1560 case MAT_IGNORE_OFF_PROC_ENTRIES: 1561 a->donotstash = PETSC_TRUE; 1562 break; 1563 case MAT_NO_NEW_DIAGONALS: 1564 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1565 case MAT_USE_HASH_TABLE: 1566 a->ht_flag = PETSC_TRUE; 1567 break; 1568 default: 1569 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1570 } 1571 PetscFunctionReturn(0); 1572 } 1573 1574 #undef __FUNCT__ 1575 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1576 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1577 { 1578 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1579 Mat_SeqBAIJ *Aloc; 1580 Mat B; 1581 int ierr,M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col; 1582 int bs=baij->bs,mbs=baij->mbs; 1583 MatScalar *a; 1584 1585 PetscFunctionBegin; 1586 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1587 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1588 1589 /* copy over the A part */ 1590 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1591 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1592 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 1593 1594 for (i=0; i<mbs; i++) { 1595 rvals[0] = bs*(baij->rstart + i); 1596 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1597 for (j=ai[i]; j<ai[i+1]; j++) { 1598 col = (baij->cstart+aj[j])*bs; 1599 for (k=0; k<bs; k++) { 1600 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1601 col++; a += bs; 1602 } 1603 } 1604 } 1605 /* copy over the B part */ 1606 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1607 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1608 for (i=0; i<mbs; i++) { 1609 rvals[0] = bs*(baij->rstart + i); 1610 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1611 for (j=ai[i]; j<ai[i+1]; j++) { 1612 col = baij->garray[aj[j]]*bs; 1613 for (k=0; k<bs; k++) { 1614 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1615 col++; a += bs; 1616 } 1617 } 1618 } 1619 ierr = PetscFree(rvals);CHKERRQ(ierr); 1620 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1621 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1622 1623 if (matout) { 1624 *matout = B; 1625 } else { 1626 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1627 } 1628 PetscFunctionReturn(0); 1629 } 1630 1631 #undef __FUNCT__ 1632 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1633 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1634 { 1635 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1636 Mat a = baij->A,b = baij->B; 1637 int ierr,s1,s2,s3; 1638 1639 PetscFunctionBegin; 1640 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1641 if (rr) { 1642 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1643 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1644 /* Overlap communication with computation. */ 1645 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1646 } 1647 if (ll) { 1648 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1649 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1650 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1651 } 1652 /* scale the diagonal block */ 1653 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1654 1655 if (rr) { 1656 /* Do a scatter end and then right scale the off-diagonal block */ 1657 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1658 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1659 } 1660 1661 PetscFunctionReturn(0); 1662 } 1663 1664 #undef __FUNCT__ 1665 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1666 int MatZeroRows_MPIBAIJ(Mat A,IS is,PetscScalar *diag) 1667 { 1668 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1669 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 1670 int *procs,*nprocs,j,idx,nsends,*work,row; 1671 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1672 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1673 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1674 MPI_Comm comm = A->comm; 1675 MPI_Request *send_waits,*recv_waits; 1676 MPI_Status recv_status,*send_status; 1677 IS istmp; 1678 PetscTruth found; 1679 1680 PetscFunctionBegin; 1681 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 1682 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1683 1684 /* first count number of contributors to each processor */ 1685 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 1686 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1687 procs = nprocs + size; 1688 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 1689 for (i=0; i<N; i++) { 1690 idx = rows[i]; 1691 found = PETSC_FALSE; 1692 for (j=0; j<size; j++) { 1693 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1694 nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 1695 } 1696 } 1697 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1698 } 1699 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 1700 1701 /* inform other processors of number of messages and max length*/ 1702 ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr); 1703 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 1704 nmax = work[rank]; 1705 nrecvs = work[size+rank]; 1706 ierr = PetscFree(work);CHKERRQ(ierr); 1707 1708 /* post receives: */ 1709 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 1710 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1711 for (i=0; i<nrecvs; i++) { 1712 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1713 } 1714 1715 /* do sends: 1716 1) starts[i] gives the starting index in svalues for stuff going to 1717 the ith processor 1718 */ 1719 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 1720 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1721 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 1722 starts[0] = 0; 1723 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1724 for (i=0; i<N; i++) { 1725 svalues[starts[owner[i]]++] = rows[i]; 1726 } 1727 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1728 1729 starts[0] = 0; 1730 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1731 count = 0; 1732 for (i=0; i<size; i++) { 1733 if (procs[i]) { 1734 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1735 } 1736 } 1737 ierr = PetscFree(starts);CHKERRQ(ierr); 1738 1739 base = owners[rank]*bs; 1740 1741 /* wait on receives */ 1742 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 1743 source = lens + nrecvs; 1744 count = nrecvs; slen = 0; 1745 while (count) { 1746 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1747 /* unpack receives into our local space */ 1748 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1749 source[imdex] = recv_status.MPI_SOURCE; 1750 lens[imdex] = n; 1751 slen += n; 1752 count--; 1753 } 1754 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1755 1756 /* move the data into the send scatter */ 1757 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 1758 count = 0; 1759 for (i=0; i<nrecvs; i++) { 1760 values = rvalues + i*nmax; 1761 for (j=0; j<lens[i]; j++) { 1762 lrows[count++] = values[j] - base; 1763 } 1764 } 1765 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1766 ierr = PetscFree(lens);CHKERRQ(ierr); 1767 ierr = PetscFree(owner);CHKERRQ(ierr); 1768 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1769 1770 /* actually zap the local rows */ 1771 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1772 PetscLogObjectParent(A,istmp); 1773 1774 /* 1775 Zero the required rows. If the "diagonal block" of the matrix 1776 is square and the user wishes to set the diagonal we use seperate 1777 code so that MatSetValues() is not called for each diagonal allocating 1778 new memory, thus calling lots of mallocs and slowing things down. 1779 1780 Contributed by: Mathew Knepley 1781 */ 1782 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1783 ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1784 if (diag && (l->A->M == l->A->N)) { 1785 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1786 } else if (diag) { 1787 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1788 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1789 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1790 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1791 } 1792 for (i=0; i<slen; i++) { 1793 row = lrows[i] + rstart_bs; 1794 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1795 } 1796 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1797 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1798 } else { 1799 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1800 } 1801 1802 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1803 ierr = PetscFree(lrows);CHKERRQ(ierr); 1804 1805 /* wait on sends */ 1806 if (nsends) { 1807 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1808 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1809 ierr = PetscFree(send_status);CHKERRQ(ierr); 1810 } 1811 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1812 ierr = PetscFree(svalues);CHKERRQ(ierr); 1813 1814 PetscFunctionReturn(0); 1815 } 1816 1817 #undef __FUNCT__ 1818 #define __FUNCT__ "MatPrintHelp_MPIBAIJ" 1819 int MatPrintHelp_MPIBAIJ(Mat A) 1820 { 1821 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1822 MPI_Comm comm = A->comm; 1823 static int called = 0; 1824 int ierr; 1825 1826 PetscFunctionBegin; 1827 if (!a->rank) { 1828 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1829 } 1830 if (called) {PetscFunctionReturn(0);} else called = 1; 1831 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1832 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1833 PetscFunctionReturn(0); 1834 } 1835 1836 #undef __FUNCT__ 1837 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1838 int MatSetUnfactored_MPIBAIJ(Mat A) 1839 { 1840 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1841 int ierr; 1842 1843 PetscFunctionBegin; 1844 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1845 PetscFunctionReturn(0); 1846 } 1847 1848 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1849 1850 #undef __FUNCT__ 1851 #define __FUNCT__ "MatEqual_MPIBAIJ" 1852 int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1853 { 1854 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1855 Mat a,b,c,d; 1856 PetscTruth flg; 1857 int ierr; 1858 1859 PetscFunctionBegin; 1860 ierr = PetscTypeCompare((PetscObject)B,MATMPIBAIJ,&flg);CHKERRQ(ierr); 1861 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1862 a = matA->A; b = matA->B; 1863 c = matB->A; d = matB->B; 1864 1865 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1866 if (flg == PETSC_TRUE) { 1867 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1868 } 1869 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1870 PetscFunctionReturn(0); 1871 } 1872 1873 1874 #undef __FUNCT__ 1875 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1876 int MatSetUpPreallocation_MPIBAIJ(Mat A) 1877 { 1878 int ierr; 1879 1880 PetscFunctionBegin; 1881 ierr = MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1882 PetscFunctionReturn(0); 1883 } 1884 1885 /* -------------------------------------------------------------------*/ 1886 static struct _MatOps MatOps_Values = { 1887 MatSetValues_MPIBAIJ, 1888 MatGetRow_MPIBAIJ, 1889 MatRestoreRow_MPIBAIJ, 1890 MatMult_MPIBAIJ, 1891 MatMultAdd_MPIBAIJ, 1892 MatMultTranspose_MPIBAIJ, 1893 MatMultTransposeAdd_MPIBAIJ, 1894 0, 1895 0, 1896 0, 1897 0, 1898 0, 1899 0, 1900 0, 1901 MatTranspose_MPIBAIJ, 1902 MatGetInfo_MPIBAIJ, 1903 MatEqual_MPIBAIJ, 1904 MatGetDiagonal_MPIBAIJ, 1905 MatDiagonalScale_MPIBAIJ, 1906 MatNorm_MPIBAIJ, 1907 MatAssemblyBegin_MPIBAIJ, 1908 MatAssemblyEnd_MPIBAIJ, 1909 0, 1910 MatSetOption_MPIBAIJ, 1911 MatZeroEntries_MPIBAIJ, 1912 MatZeroRows_MPIBAIJ, 1913 0, 1914 0, 1915 0, 1916 0, 1917 MatSetUpPreallocation_MPIBAIJ, 1918 0, 1919 0, 1920 0, 1921 0, 1922 MatDuplicate_MPIBAIJ, 1923 0, 1924 0, 1925 0, 1926 0, 1927 0, 1928 MatGetSubMatrices_MPIBAIJ, 1929 MatIncreaseOverlap_MPIBAIJ, 1930 MatGetValues_MPIBAIJ, 1931 0, 1932 MatPrintHelp_MPIBAIJ, 1933 MatScale_MPIBAIJ, 1934 0, 1935 0, 1936 0, 1937 MatGetBlockSize_MPIBAIJ, 1938 0, 1939 0, 1940 0, 1941 0, 1942 0, 1943 0, 1944 MatSetUnfactored_MPIBAIJ, 1945 0, 1946 MatSetValuesBlocked_MPIBAIJ, 1947 0, 1948 MatDestroy_MPIBAIJ, 1949 MatView_MPIBAIJ, 1950 MatGetPetscMaps_Petsc, 1951 0, 1952 0, 1953 0, 1954 0, 1955 0, 1956 0, 1957 MatGetRowMax_MPIBAIJ}; 1958 1959 1960 EXTERN_C_BEGIN 1961 #undef __FUNCT__ 1962 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 1963 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1964 { 1965 PetscFunctionBegin; 1966 *a = ((Mat_MPIBAIJ *)A->data)->A; 1967 *iscopy = PETSC_FALSE; 1968 PetscFunctionReturn(0); 1969 } 1970 EXTERN_C_END 1971 1972 EXTERN_C_BEGIN 1973 #undef __FUNCT__ 1974 #define __FUNCT__ "MatCreate_MPIBAIJ" 1975 int MatCreate_MPIBAIJ(Mat B) 1976 { 1977 Mat_MPIBAIJ *b; 1978 int ierr; 1979 PetscTruth flg; 1980 1981 PetscFunctionBegin; 1982 1983 ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr); 1984 B->data = (void*)b; 1985 1986 ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 1987 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1988 B->mapping = 0; 1989 B->factor = 0; 1990 B->assembled = PETSC_FALSE; 1991 1992 B->insertmode = NOT_SET_VALUES; 1993 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1994 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1995 1996 /* build local table of row and column ownerships */ 1997 ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1998 PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 1999 b->cowners = b->rowners + b->size + 2; 2000 b->rowners_bs = b->cowners + b->size + 2; 2001 2002 /* build cache for off array entries formed */ 2003 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2004 b->donotstash = PETSC_FALSE; 2005 b->colmap = PETSC_NULL; 2006 b->garray = PETSC_NULL; 2007 b->roworiented = PETSC_TRUE; 2008 2009 #if defined(PETSC_USE_MAT_SINGLE) 2010 /* stuff for MatSetValues_XXX in single precision */ 2011 b->setvalueslen = 0; 2012 b->setvaluescopy = PETSC_NULL; 2013 #endif 2014 2015 /* stuff used in block assembly */ 2016 b->barray = 0; 2017 2018 /* stuff used for matrix vector multiply */ 2019 b->lvec = 0; 2020 b->Mvctx = 0; 2021 2022 /* stuff for MatGetRow() */ 2023 b->rowindices = 0; 2024 b->rowvalues = 0; 2025 b->getrowactive = PETSC_FALSE; 2026 2027 /* hash table stuff */ 2028 b->ht = 0; 2029 b->hd = 0; 2030 b->ht_size = 0; 2031 b->ht_flag = PETSC_FALSE; 2032 b->ht_fact = 0; 2033 b->ht_total_ct = 0; 2034 b->ht_insert_ct = 0; 2035 2036 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2037 if (flg) { 2038 PetscReal fact = 1.39; 2039 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2040 ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2041 if (fact <= 1.0) fact = 1.39; 2042 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2043 PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2044 } 2045 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2046 "MatStoreValues_MPIBAIJ", 2047 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2048 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2049 "MatRetrieveValues_MPIBAIJ", 2050 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2051 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2052 "MatGetDiagonalBlock_MPIBAIJ", 2053 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2054 PetscFunctionReturn(0); 2055 } 2056 EXTERN_C_END 2057 2058 #undef __FUNCT__ 2059 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2060 /*@C 2061 MatMPIBAIJSetPreallocation - Creates a sparse parallel matrix in block AIJ format 2062 (block compressed row). For good matrix assembly performance 2063 the user should preallocate the matrix storage by setting the parameters 2064 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2065 performance can be increased by more than a factor of 50. 2066 2067 Collective on Mat 2068 2069 Input Parameters: 2070 + A - the matrix 2071 . bs - size of blockk 2072 . d_nz - number of block nonzeros per block row in diagonal portion of local 2073 submatrix (same for all local rows) 2074 . d_nnz - array containing the number of block nonzeros in the various block rows 2075 of the in diagonal portion of the local (possibly different for each block 2076 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2077 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2078 submatrix (same for all local rows). 2079 - o_nnz - array containing the number of nonzeros in the various block rows of the 2080 off-diagonal portion of the local submatrix (possibly different for 2081 each block row) or PETSC_NULL. 2082 2083 Output Parameter: 2084 2085 2086 Options Database Keys: 2087 . -mat_no_unroll - uses code that does not unroll the loops in the 2088 block calculations (much slower) 2089 . -mat_block_size - size of the blocks to use 2090 2091 Notes: 2092 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2093 than it must be used on all processors that share the object for that argument. 2094 2095 Storage Information: 2096 For a square global matrix we define each processor's diagonal portion 2097 to be its local rows and the corresponding columns (a square submatrix); 2098 each processor's off-diagonal portion encompasses the remainder of the 2099 local matrix (a rectangular submatrix). 2100 2101 The user can specify preallocated storage for the diagonal part of 2102 the local submatrix with either d_nz or d_nnz (not both). Set 2103 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2104 memory allocation. Likewise, specify preallocated storage for the 2105 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2106 2107 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2108 the figure below we depict these three local rows and all columns (0-11). 2109 2110 .vb 2111 0 1 2 3 4 5 6 7 8 9 10 11 2112 ------------------- 2113 row 3 | o o o d d d o o o o o o 2114 row 4 | o o o d d d o o o o o o 2115 row 5 | o o o d d d o o o o o o 2116 ------------------- 2117 .ve 2118 2119 Thus, any entries in the d locations are stored in the d (diagonal) 2120 submatrix, and any entries in the o locations are stored in the 2121 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2122 stored simply in the MATSEQBAIJ format for compressed row storage. 2123 2124 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2125 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2126 In general, for PDE problems in which most nonzeros are near the diagonal, 2127 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2128 or you will get TERRIBLE performance; see the users' manual chapter on 2129 matrices. 2130 2131 Level: intermediate 2132 2133 .keywords: matrix, block, aij, compressed row, sparse, parallel 2134 2135 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2136 @*/ 2137 int MatMPIBAIJSetPreallocation(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2138 { 2139 Mat_MPIBAIJ *b; 2140 int ierr,i; 2141 PetscTruth flg2; 2142 2143 PetscFunctionBegin; 2144 ierr = PetscTypeCompare((PetscObject)B,MATMPIBAIJ,&flg2);CHKERRQ(ierr); 2145 if (!flg2) PetscFunctionReturn(0); 2146 2147 B->preallocated = PETSC_TRUE; 2148 ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2149 2150 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2151 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2152 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2153 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2154 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2155 if (d_nnz) { 2156 for (i=0; i<B->m/bs; i++) { 2157 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 2158 } 2159 } 2160 if (o_nnz) { 2161 for (i=0; i<B->m/bs; i++) { 2162 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 2163 } 2164 } 2165 2166 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 2167 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 2168 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 2169 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 2170 2171 b = (Mat_MPIBAIJ*)B->data; 2172 b->bs = bs; 2173 b->bs2 = bs*bs; 2174 b->mbs = B->m/bs; 2175 b->nbs = B->n/bs; 2176 b->Mbs = B->M/bs; 2177 b->Nbs = B->N/bs; 2178 2179 ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 2180 b->rowners[0] = 0; 2181 for (i=2; i<=b->size; i++) { 2182 b->rowners[i] += b->rowners[i-1]; 2183 } 2184 b->rstart = b->rowners[b->rank]; 2185 b->rend = b->rowners[b->rank+1]; 2186 2187 ierr = MPI_Allgather(&b->nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 2188 b->cowners[0] = 0; 2189 for (i=2; i<=b->size; i++) { 2190 b->cowners[i] += b->cowners[i-1]; 2191 } 2192 b->cstart = b->cowners[b->rank]; 2193 b->cend = b->cowners[b->rank+1]; 2194 2195 for (i=0; i<=b->size; i++) { 2196 b->rowners_bs[i] = b->rowners[i]*bs; 2197 } 2198 b->rstart_bs = b->rstart*bs; 2199 b->rend_bs = b->rend*bs; 2200 b->cstart_bs = b->cstart*bs; 2201 b->cend_bs = b->cend*bs; 2202 2203 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2204 PetscLogObjectParent(B,b->A); 2205 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2206 PetscLogObjectParent(B,b->B); 2207 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2208 2209 PetscFunctionReturn(0); 2210 } 2211 2212 #undef __FUNCT__ 2213 #define __FUNCT__ "MatCreateMPIBAIJ" 2214 /*@C 2215 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 2216 (block compressed row). For good matrix assembly performance 2217 the user should preallocate the matrix storage by setting the parameters 2218 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2219 performance can be increased by more than a factor of 50. 2220 2221 Collective on MPI_Comm 2222 2223 Input Parameters: 2224 + comm - MPI communicator 2225 . bs - size of blockk 2226 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2227 This value should be the same as the local size used in creating the 2228 y vector for the matrix-vector product y = Ax. 2229 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2230 This value should be the same as the local size used in creating the 2231 x vector for the matrix-vector product y = Ax. 2232 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2233 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2234 . d_nz - number of nonzero blocks per block row in diagonal portion of local 2235 submatrix (same for all local rows) 2236 . d_nnz - array containing the number of nonzero blocks in the various block rows 2237 of the in diagonal portion of the local (possibly different for each block 2238 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2239 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 2240 submatrix (same for all local rows). 2241 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 2242 off-diagonal portion of the local submatrix (possibly different for 2243 each block row) or PETSC_NULL. 2244 2245 Output Parameter: 2246 . A - the matrix 2247 2248 Options Database Keys: 2249 . -mat_no_unroll - uses code that does not unroll the loops in the 2250 block calculations (much slower) 2251 . -mat_block_size - size of the blocks to use 2252 2253 Notes: 2254 A nonzero block is any block that as 1 or more nonzeros in it 2255 2256 The user MUST specify either the local or global matrix dimensions 2257 (possibly both). 2258 2259 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2260 than it must be used on all processors that share the object for that argument. 2261 2262 Storage Information: 2263 For a square global matrix we define each processor's diagonal portion 2264 to be its local rows and the corresponding columns (a square submatrix); 2265 each processor's off-diagonal portion encompasses the remainder of the 2266 local matrix (a rectangular submatrix). 2267 2268 The user can specify preallocated storage for the diagonal part of 2269 the local submatrix with either d_nz or d_nnz (not both). Set 2270 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2271 memory allocation. Likewise, specify preallocated storage for the 2272 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2273 2274 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2275 the figure below we depict these three local rows and all columns (0-11). 2276 2277 .vb 2278 0 1 2 3 4 5 6 7 8 9 10 11 2279 ------------------- 2280 row 3 | o o o d d d o o o o o o 2281 row 4 | o o o d d d o o o o o o 2282 row 5 | o o o d d d o o o o o o 2283 ------------------- 2284 .ve 2285 2286 Thus, any entries in the d locations are stored in the d (diagonal) 2287 submatrix, and any entries in the o locations are stored in the 2288 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2289 stored simply in the MATSEQBAIJ format for compressed row storage. 2290 2291 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2292 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2293 In general, for PDE problems in which most nonzeros are near the diagonal, 2294 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2295 or you will get TERRIBLE performance; see the users' manual chapter on 2296 matrices. 2297 2298 Level: intermediate 2299 2300 .keywords: matrix, block, aij, compressed row, sparse, parallel 2301 2302 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2303 @*/ 2304 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2305 { 2306 int ierr,size; 2307 2308 PetscFunctionBegin; 2309 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2310 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2311 if (size > 1) { 2312 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2313 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2314 } else { 2315 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2316 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2317 } 2318 PetscFunctionReturn(0); 2319 } 2320 2321 #undef __FUNCT__ 2322 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 2323 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2324 { 2325 Mat mat; 2326 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2327 int ierr,len=0; 2328 2329 PetscFunctionBegin; 2330 *newmat = 0; 2331 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2332 ierr = MatSetType(mat,MATMPIBAIJ);CHKERRQ(ierr); 2333 mat->preallocated = PETSC_TRUE; 2334 mat->assembled = PETSC_TRUE; 2335 a = (Mat_MPIBAIJ*)mat->data; 2336 a->bs = oldmat->bs; 2337 a->bs2 = oldmat->bs2; 2338 a->mbs = oldmat->mbs; 2339 a->nbs = oldmat->nbs; 2340 a->Mbs = oldmat->Mbs; 2341 a->Nbs = oldmat->Nbs; 2342 2343 a->rstart = oldmat->rstart; 2344 a->rend = oldmat->rend; 2345 a->cstart = oldmat->cstart; 2346 a->cend = oldmat->cend; 2347 a->size = oldmat->size; 2348 a->rank = oldmat->rank; 2349 a->donotstash = oldmat->donotstash; 2350 a->roworiented = oldmat->roworiented; 2351 a->rowindices = 0; 2352 a->rowvalues = 0; 2353 a->getrowactive = PETSC_FALSE; 2354 a->barray = 0; 2355 a->rstart_bs = oldmat->rstart_bs; 2356 a->rend_bs = oldmat->rend_bs; 2357 a->cstart_bs = oldmat->cstart_bs; 2358 a->cend_bs = oldmat->cend_bs; 2359 2360 /* hash table stuff */ 2361 a->ht = 0; 2362 a->hd = 0; 2363 a->ht_size = 0; 2364 a->ht_flag = oldmat->ht_flag; 2365 a->ht_fact = oldmat->ht_fact; 2366 a->ht_total_ct = 0; 2367 a->ht_insert_ct = 0; 2368 2369 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2370 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2371 ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2372 if (oldmat->colmap) { 2373 #if defined (PETSC_USE_CTABLE) 2374 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2375 #else 2376 ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr); 2377 PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2378 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2379 #endif 2380 } else a->colmap = 0; 2381 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2382 ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr); 2383 PetscLogObjectMemory(mat,len*sizeof(int)); 2384 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2385 } else a->garray = 0; 2386 2387 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2388 PetscLogObjectParent(mat,a->lvec); 2389 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2390 2391 PetscLogObjectParent(mat,a->Mvctx); 2392 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2393 PetscLogObjectParent(mat,a->A); 2394 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2395 PetscLogObjectParent(mat,a->B); 2396 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2397 *newmat = mat; 2398 PetscFunctionReturn(0); 2399 } 2400 2401 #include "petscsys.h" 2402 2403 EXTERN_C_BEGIN 2404 #undef __FUNCT__ 2405 #define __FUNCT__ "MatLoad_MPIBAIJ" 2406 int MatLoad_MPIBAIJ(PetscViewer viewer,MatType type,Mat *newmat) 2407 { 2408 Mat A; 2409 int i,nz,ierr,j,rstart,rend,fd; 2410 PetscScalar *vals,*buf; 2411 MPI_Comm comm = ((PetscObject)viewer)->comm; 2412 MPI_Status status; 2413 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2414 int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2415 int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2416 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2417 int dcount,kmax,k,nzcount,tmp; 2418 2419 PetscFunctionBegin; 2420 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2421 2422 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2423 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2424 if (!rank) { 2425 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2426 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2427 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2428 if (header[3] < 0) { 2429 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ"); 2430 } 2431 } 2432 2433 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2434 M = header[1]; N = header[2]; 2435 2436 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2437 2438 /* 2439 This code adds extra rows to make sure the number of rows is 2440 divisible by the blocksize 2441 */ 2442 Mbs = M/bs; 2443 extra_rows = bs - M + bs*(Mbs); 2444 if (extra_rows == bs) extra_rows = 0; 2445 else Mbs++; 2446 if (extra_rows &&!rank) { 2447 PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2448 } 2449 2450 /* determine ownership of all rows */ 2451 mbs = Mbs/size + ((Mbs % size) > rank); 2452 m = mbs*bs; 2453 ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2454 browners = rowners + size + 1; 2455 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2456 rowners[0] = 0; 2457 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2458 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2459 rstart = rowners[rank]; 2460 rend = rowners[rank+1]; 2461 2462 /* distribute row lengths to all processors */ 2463 ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr); 2464 if (!rank) { 2465 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2466 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2467 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2468 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2469 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2470 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2471 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2472 } else { 2473 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2474 } 2475 2476 if (!rank) { 2477 /* calculate the number of nonzeros on each processor */ 2478 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2479 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2480 for (i=0; i<size; i++) { 2481 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2482 procsnz[i] += rowlengths[j]; 2483 } 2484 } 2485 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2486 2487 /* determine max buffer needed and allocate it */ 2488 maxnz = 0; 2489 for (i=0; i<size; i++) { 2490 maxnz = PetscMax(maxnz,procsnz[i]); 2491 } 2492 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2493 2494 /* read in my part of the matrix column indices */ 2495 nz = procsnz[0]; 2496 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2497 mycols = ibuf; 2498 if (size == 1) nz -= extra_rows; 2499 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2500 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2501 2502 /* read in every ones (except the last) and ship off */ 2503 for (i=1; i<size-1; i++) { 2504 nz = procsnz[i]; 2505 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2506 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2507 } 2508 /* read in the stuff for the last proc */ 2509 if (size != 1) { 2510 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2511 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2512 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2513 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2514 } 2515 ierr = PetscFree(cols);CHKERRQ(ierr); 2516 } else { 2517 /* determine buffer space needed for message */ 2518 nz = 0; 2519 for (i=0; i<m; i++) { 2520 nz += locrowlens[i]; 2521 } 2522 ierr = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr); 2523 mycols = ibuf; 2524 /* receive message of column indices*/ 2525 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2526 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2527 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2528 } 2529 2530 /* loop over local rows, determining number of off diagonal entries */ 2531 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2532 odlens = dlens + (rend-rstart); 2533 ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr); 2534 ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2535 masked1 = mask + Mbs; 2536 masked2 = masked1 + Mbs; 2537 rowcount = 0; nzcount = 0; 2538 for (i=0; i<mbs; i++) { 2539 dcount = 0; 2540 odcount = 0; 2541 for (j=0; j<bs; j++) { 2542 kmax = locrowlens[rowcount]; 2543 for (k=0; k<kmax; k++) { 2544 tmp = mycols[nzcount++]/bs; 2545 if (!mask[tmp]) { 2546 mask[tmp] = 1; 2547 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2548 else masked1[dcount++] = tmp; 2549 } 2550 } 2551 rowcount++; 2552 } 2553 2554 dlens[i] = dcount; 2555 odlens[i] = odcount; 2556 2557 /* zero out the mask elements we set */ 2558 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2559 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2560 } 2561 2562 /* create our matrix */ 2563 ierr = MatCreateMPIBAIJ(comm,bs,m,m,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 2564 A = *newmat; 2565 MatSetOption(A,MAT_COLUMNS_SORTED); 2566 2567 if (!rank) { 2568 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2569 /* read in my part of the matrix numerical values */ 2570 nz = procsnz[0]; 2571 vals = buf; 2572 mycols = ibuf; 2573 if (size == 1) nz -= extra_rows; 2574 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2575 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2576 2577 /* insert into matrix */ 2578 jj = rstart*bs; 2579 for (i=0; i<m; i++) { 2580 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2581 mycols += locrowlens[i]; 2582 vals += locrowlens[i]; 2583 jj++; 2584 } 2585 /* read in other processors (except the last one) and ship out */ 2586 for (i=1; i<size-1; i++) { 2587 nz = procsnz[i]; 2588 vals = buf; 2589 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2590 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2591 } 2592 /* the last proc */ 2593 if (size != 1){ 2594 nz = procsnz[i] - extra_rows; 2595 vals = buf; 2596 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2597 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2598 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2599 } 2600 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2601 } else { 2602 /* receive numeric values */ 2603 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2604 2605 /* receive message of values*/ 2606 vals = buf; 2607 mycols = ibuf; 2608 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2609 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2610 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2611 2612 /* insert into matrix */ 2613 jj = rstart*bs; 2614 for (i=0; i<m; i++) { 2615 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2616 mycols += locrowlens[i]; 2617 vals += locrowlens[i]; 2618 jj++; 2619 } 2620 } 2621 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2622 ierr = PetscFree(buf);CHKERRQ(ierr); 2623 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2624 ierr = PetscFree(rowners);CHKERRQ(ierr); 2625 ierr = PetscFree(dlens);CHKERRQ(ierr); 2626 ierr = PetscFree(mask);CHKERRQ(ierr); 2627 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2628 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2629 PetscFunctionReturn(0); 2630 } 2631 EXTERN_C_END 2632 2633 #undef __FUNCT__ 2634 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2635 /*@ 2636 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2637 2638 Input Parameters: 2639 . mat - the matrix 2640 . fact - factor 2641 2642 Collective on Mat 2643 2644 Level: advanced 2645 2646 Notes: 2647 This can also be set by the command line option: -mat_use_hash_table fact 2648 2649 .keywords: matrix, hashtable, factor, HT 2650 2651 .seealso: MatSetOption() 2652 @*/ 2653 int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2654 { 2655 Mat_MPIBAIJ *baij; 2656 int ierr; 2657 PetscTruth flg; 2658 2659 PetscFunctionBegin; 2660 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2661 ierr = PetscTypeCompare((PetscObject)mat,MATMPIBAIJ,&flg);CHKERRQ(ierr); 2662 if (!flg) { 2663 SETERRQ(PETSC_ERR_ARG_WRONG,"Incorrect matrix type. Use MPIBAIJ only."); 2664 } 2665 baij = (Mat_MPIBAIJ*)mat->data; 2666 baij->ht_fact = fact; 2667 PetscFunctionReturn(0); 2668 } 2669 2670 #undef __FUNCT__ 2671 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 2672 int MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap) 2673 { 2674 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2675 PetscFunctionBegin; 2676 *Ad = a->A; 2677 *Ao = a->B; 2678 *colmap = a->garray; 2679 PetscFunctionReturn(0); 2680 } 2681