1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "mpisbaij.h" 5 #include "src/mat/impls/sbaij/seq/sbaij.h" 6 7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat); 10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 19 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 20 EXTERN PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]); 21 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 22 23 /* UGLY, ugly, ugly 24 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 25 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 26 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 27 converts the entries into single precision and then calls ..._MatScalar() to put them 28 into the single precision data structures. 29 */ 30 #if defined(PETSC_USE_MAT_SINGLE) 31 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 32 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 33 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 34 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 35 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 36 #else 37 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 38 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 39 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 40 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 41 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 42 #endif 43 44 EXTERN_C_BEGIN 45 #undef __FUNCT__ 46 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 47 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat) 48 { 49 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 54 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 55 PetscFunctionReturn(0); 56 } 57 EXTERN_C_END 58 59 EXTERN_C_BEGIN 60 #undef __FUNCT__ 61 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 62 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat) 63 { 64 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 65 PetscErrorCode ierr; 66 67 PetscFunctionBegin; 68 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 69 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 70 PetscFunctionReturn(0); 71 } 72 EXTERN_C_END 73 74 75 #define CHUNKSIZE 10 76 77 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 78 { \ 79 \ 80 brow = row/bs; \ 81 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 82 rmax = aimax[brow]; nrow = ailen[brow]; \ 83 bcol = col/bs; \ 84 ridx = row % bs; cidx = col % bs; \ 85 low = 0; high = nrow; \ 86 while (high-low > 3) { \ 87 t = (low+high)/2; \ 88 if (rp[t] > bcol) high = t; \ 89 else low = t; \ 90 } \ 91 for (_i=low; _i<high; _i++) { \ 92 if (rp[_i] > bcol) break; \ 93 if (rp[_i] == bcol) { \ 94 bap = ap + bs2*_i + bs*cidx + ridx; \ 95 if (addv == ADD_VALUES) *bap += value; \ 96 else *bap = value; \ 97 goto a_noinsert; \ 98 } \ 99 } \ 100 if (a->nonew == 1) goto a_noinsert; \ 101 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 102 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 103 N = nrow++ - 1; \ 104 /* shift up all the later entries in this row */ \ 105 for (ii=N; ii>=_i; ii--) { \ 106 rp[ii+1] = rp[ii]; \ 107 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 108 } \ 109 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 110 rp[_i] = bcol; \ 111 ap[bs2*_i + bs*cidx + ridx] = value; \ 112 a_noinsert:; \ 113 ailen[brow] = nrow; \ 114 } 115 #ifndef MatSetValues_SeqBAIJ_B_Private 116 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 117 { \ 118 brow = row/bs; \ 119 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 120 rmax = bimax[brow]; nrow = bilen[brow]; \ 121 bcol = col/bs; \ 122 ridx = row % bs; cidx = col % bs; \ 123 low = 0; high = nrow; \ 124 while (high-low > 3) { \ 125 t = (low+high)/2; \ 126 if (rp[t] > bcol) high = t; \ 127 else low = t; \ 128 } \ 129 for (_i=low; _i<high; _i++) { \ 130 if (rp[_i] > bcol) break; \ 131 if (rp[_i] == bcol) { \ 132 bap = ap + bs2*_i + bs*cidx + ridx; \ 133 if (addv == ADD_VALUES) *bap += value; \ 134 else *bap = value; \ 135 goto b_noinsert; \ 136 } \ 137 } \ 138 if (b->nonew == 1) goto b_noinsert; \ 139 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 140 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 141 N = nrow++ - 1; \ 142 /* shift up all the later entries in this row */ \ 143 for (ii=N; ii>=_i; ii--) { \ 144 rp[ii+1] = rp[ii]; \ 145 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 146 } \ 147 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 148 rp[_i] = bcol; \ 149 ap[bs2*_i + bs*cidx + ridx] = value; \ 150 b_noinsert:; \ 151 bilen[brow] = nrow; \ 152 } 153 #endif 154 155 #if defined(PETSC_USE_MAT_SINGLE) 156 #undef __FUNCT__ 157 #define __FUNCT__ "MatSetValues_MPISBAIJ" 158 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 159 { 160 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 161 PetscErrorCode ierr; 162 PetscInt i,N = m*n; 163 MatScalar *vsingle; 164 165 PetscFunctionBegin; 166 if (N > b->setvalueslen) { 167 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 168 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 169 b->setvalueslen = N; 170 } 171 vsingle = b->setvaluescopy; 172 173 for (i=0; i<N; i++) { 174 vsingle[i] = v[i]; 175 } 176 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 177 PetscFunctionReturn(0); 178 } 179 180 #undef __FUNCT__ 181 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 182 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 183 { 184 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 185 PetscErrorCode ierr; 186 PetscInt i,N = m*n*b->bs2; 187 MatScalar *vsingle; 188 189 PetscFunctionBegin; 190 if (N > b->setvalueslen) { 191 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 192 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 193 b->setvalueslen = N; 194 } 195 vsingle = b->setvaluescopy; 196 for (i=0; i<N; i++) { 197 vsingle[i] = v[i]; 198 } 199 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 200 PetscFunctionReturn(0); 201 } 202 #endif 203 204 /* Only add/insert a(i,j) with i<=j (blocks). 205 Any a(i,j) with i>j input by user is ingored. 206 */ 207 #undef __FUNCT__ 208 #define __FUNCT__ "MatSetValues_MPISBAIJ_MatScalar" 209 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 210 { 211 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 212 MatScalar value; 213 PetscTruth roworiented = baij->roworiented; 214 PetscErrorCode ierr; 215 PetscInt i,j,row,col; 216 PetscInt rstart_orig=mat->rmap.rstart; 217 PetscInt rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart; 218 PetscInt cend_orig=mat->cmap.rend,bs=mat->rmap.bs; 219 220 /* Some Variables required in the macro */ 221 Mat A = baij->A; 222 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 223 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 224 MatScalar *aa=a->a; 225 226 Mat B = baij->B; 227 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 228 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 229 MatScalar *ba=b->a; 230 231 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 232 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 233 MatScalar *ap,*bap; 234 235 /* for stash */ 236 PetscInt n_loc, *in_loc = PETSC_NULL; 237 MatScalar *v_loc = PETSC_NULL; 238 239 PetscFunctionBegin; 240 241 if (!baij->donotstash){ 242 if (n > baij->n_loc) { 243 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 244 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 245 ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr); 246 ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr); 247 baij->n_loc = n; 248 } 249 in_loc = baij->in_loc; 250 v_loc = baij->v_loc; 251 } 252 253 for (i=0; i<m; i++) { 254 if (im[i] < 0) continue; 255 #if defined(PETSC_USE_DEBUG) 256 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 257 #endif 258 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 259 row = im[i] - rstart_orig; /* local row index */ 260 for (j=0; j<n; j++) { 261 if (im[i]/bs > in[j]/bs){ 262 if (a->ignore_ltriangular){ 263 continue; /* ignore lower triangular blocks */ 264 } else { 265 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 266 } 267 } 268 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 269 col = in[j] - cstart_orig; /* local col index */ 270 brow = row/bs; bcol = col/bs; 271 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 272 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 273 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 274 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 275 } else if (in[j] < 0) continue; 276 #if defined(PETSC_USE_DEBUG) 277 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 278 #endif 279 else { /* off-diag entry (B) */ 280 if (mat->was_assembled) { 281 if (!baij->colmap) { 282 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 283 } 284 #if defined (PETSC_USE_CTABLE) 285 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 286 col = col - 1; 287 #else 288 col = baij->colmap[in[j]/bs] - 1; 289 #endif 290 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 291 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 292 col = in[j]; 293 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 294 B = baij->B; 295 b = (Mat_SeqBAIJ*)(B)->data; 296 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 297 ba=b->a; 298 } else col += in[j]%bs; 299 } else col = in[j]; 300 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 301 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 302 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 303 } 304 } 305 } else { /* off processor entry */ 306 if (!baij->donotstash) { 307 n_loc = 0; 308 for (j=0; j<n; j++){ 309 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 310 in_loc[n_loc] = in[j]; 311 if (roworiented) { 312 v_loc[n_loc] = v[i*n+j]; 313 } else { 314 v_loc[n_loc] = v[j*m+i]; 315 } 316 n_loc++; 317 } 318 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 319 } 320 } 321 } 322 PetscFunctionReturn(0); 323 } 324 325 #undef __FUNCT__ 326 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar" 327 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 328 { 329 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 330 const MatScalar *value; 331 MatScalar *barray=baij->barray; 332 PetscTruth roworiented = baij->roworiented; 333 PetscErrorCode ierr; 334 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 335 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 336 PetscInt cend=baij->rendbs,bs=mat->rmap.bs,bs2=baij->bs2; 337 338 PetscFunctionBegin; 339 if(!barray) { 340 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 341 baij->barray = barray; 342 } 343 344 if (roworiented) { 345 stepval = (n-1)*bs; 346 } else { 347 stepval = (m-1)*bs; 348 } 349 for (i=0; i<m; i++) { 350 if (im[i] < 0) continue; 351 #if defined(PETSC_USE_DEBUG) 352 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 353 #endif 354 if (im[i] >= rstart && im[i] < rend) { 355 row = im[i] - rstart; 356 for (j=0; j<n; j++) { 357 /* If NumCol = 1 then a copy is not required */ 358 if ((roworiented) && (n == 1)) { 359 barray = (MatScalar*) v + i*bs2; 360 } else if((!roworiented) && (m == 1)) { 361 barray = (MatScalar*) v + j*bs2; 362 } else { /* Here a copy is required */ 363 if (roworiented) { 364 value = v + i*(stepval+bs)*bs + j*bs; 365 } else { 366 value = v + j*(stepval+bs)*bs + i*bs; 367 } 368 for (ii=0; ii<bs; ii++,value+=stepval) { 369 for (jj=0; jj<bs; jj++) { 370 *barray++ = *value++; 371 } 372 } 373 barray -=bs2; 374 } 375 376 if (in[j] >= cstart && in[j] < cend){ 377 col = in[j] - cstart; 378 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 379 } 380 else if (in[j] < 0) continue; 381 #if defined(PETSC_USE_DEBUG) 382 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 383 #endif 384 else { 385 if (mat->was_assembled) { 386 if (!baij->colmap) { 387 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 388 } 389 390 #if defined(PETSC_USE_DEBUG) 391 #if defined (PETSC_USE_CTABLE) 392 { PetscInt data; 393 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 394 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 395 } 396 #else 397 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 398 #endif 399 #endif 400 #if defined (PETSC_USE_CTABLE) 401 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 402 col = (col - 1)/bs; 403 #else 404 col = (baij->colmap[in[j]] - 1)/bs; 405 #endif 406 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 407 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 408 col = in[j]; 409 } 410 } 411 else col = in[j]; 412 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 413 } 414 } 415 } else { 416 if (!baij->donotstash) { 417 if (roworiented) { 418 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 419 } else { 420 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 421 } 422 } 423 } 424 } 425 PetscFunctionReturn(0); 426 } 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatGetValues_MPISBAIJ" 430 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 431 { 432 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 433 PetscErrorCode ierr; 434 PetscInt bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend; 435 PetscInt bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data; 436 437 PetscFunctionBegin; 438 for (i=0; i<m; i++) { 439 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 440 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 441 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 442 row = idxm[i] - bsrstart; 443 for (j=0; j<n; j++) { 444 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 445 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 446 if (idxn[j] >= bscstart && idxn[j] < bscend){ 447 col = idxn[j] - bscstart; 448 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 449 } else { 450 if (!baij->colmap) { 451 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 452 } 453 #if defined (PETSC_USE_CTABLE) 454 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 455 data --; 456 #else 457 data = baij->colmap[idxn[j]/bs]-1; 458 #endif 459 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 460 else { 461 col = data + idxn[j]%bs; 462 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 463 } 464 } 465 } 466 } else { 467 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 468 } 469 } 470 PetscFunctionReturn(0); 471 } 472 473 #undef __FUNCT__ 474 #define __FUNCT__ "MatNorm_MPISBAIJ" 475 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 476 { 477 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 478 PetscErrorCode ierr; 479 PetscReal sum[2],*lnorm2; 480 481 PetscFunctionBegin; 482 if (baij->size == 1) { 483 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 484 } else { 485 if (type == NORM_FROBENIUS) { 486 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 487 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 488 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 489 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 490 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 491 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 492 *norm = sqrt(sum[0] + 2*sum[1]); 493 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 494 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 495 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 496 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 497 PetscReal *rsum,*rsum2,vabs; 498 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 499 PetscInt brow,bcol,col,bs=baij->A->rmap.bs,row,grow,gcol,mbs=amat->mbs; 500 MatScalar *v; 501 502 ierr = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr); 503 rsum2 = rsum + mat->cmap.N; 504 ierr = PetscMemzero(rsum,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 505 /* Amat */ 506 v = amat->a; jj = amat->j; 507 for (brow=0; brow<mbs; brow++) { 508 grow = bs*(rstart + brow); 509 nz = amat->i[brow+1] - amat->i[brow]; 510 for (bcol=0; bcol<nz; bcol++){ 511 gcol = bs*(rstart + *jj); jj++; 512 for (col=0; col<bs; col++){ 513 for (row=0; row<bs; row++){ 514 vabs = PetscAbsScalar(*v); v++; 515 rsum[gcol+col] += vabs; 516 /* non-diagonal block */ 517 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 518 } 519 } 520 } 521 } 522 /* Bmat */ 523 v = bmat->a; jj = bmat->j; 524 for (brow=0; brow<mbs; brow++) { 525 grow = bs*(rstart + brow); 526 nz = bmat->i[brow+1] - bmat->i[brow]; 527 for (bcol=0; bcol<nz; bcol++){ 528 gcol = bs*garray[*jj]; jj++; 529 for (col=0; col<bs; col++){ 530 for (row=0; row<bs; row++){ 531 vabs = PetscAbsScalar(*v); v++; 532 rsum[gcol+col] += vabs; 533 rsum[grow+row] += vabs; 534 } 535 } 536 } 537 } 538 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 539 *norm = 0.0; 540 for (col=0; col<mat->cmap.N; col++) { 541 if (rsum2[col] > *norm) *norm = rsum2[col]; 542 } 543 ierr = PetscFree(rsum);CHKERRQ(ierr); 544 } else { 545 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 546 } 547 } 548 PetscFunctionReturn(0); 549 } 550 551 #undef __FUNCT__ 552 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 553 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 554 { 555 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 556 PetscErrorCode ierr; 557 PetscInt nstash,reallocs; 558 InsertMode addv; 559 560 PetscFunctionBegin; 561 if (baij->donotstash) { 562 PetscFunctionReturn(0); 563 } 564 565 /* make sure all processors are either in INSERTMODE or ADDMODE */ 566 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 567 if (addv == (ADD_VALUES|INSERT_VALUES)) { 568 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 569 } 570 mat->insertmode = addv; /* in case this processor had no cache */ 571 572 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 573 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rangebs);CHKERRQ(ierr); 574 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 575 ierr = PetscInfo2(0,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 576 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 577 ierr = PetscInfo2(0,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 578 PetscFunctionReturn(0); 579 } 580 581 #undef __FUNCT__ 582 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 583 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 584 { 585 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 586 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 587 PetscErrorCode ierr; 588 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 589 PetscInt *row,*col,other_disassembled; 590 PetscMPIInt n; 591 PetscTruth r1,r2,r3; 592 MatScalar *val; 593 InsertMode addv = mat->insertmode; 594 595 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 596 PetscFunctionBegin; 597 598 if (!baij->donotstash) { 599 while (1) { 600 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 601 if (!flg) break; 602 603 for (i=0; i<n;) { 604 /* Now identify the consecutive vals belonging to the same row */ 605 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 606 if (j < n) ncols = j-i; 607 else ncols = n-i; 608 /* Now assemble all these values with a single function call */ 609 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 610 i = j; 611 } 612 } 613 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 614 /* Now process the block-stash. Since the values are stashed column-oriented, 615 set the roworiented flag to column oriented, and after MatSetValues() 616 restore the original flags */ 617 r1 = baij->roworiented; 618 r2 = a->roworiented; 619 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 620 baij->roworiented = PETSC_FALSE; 621 a->roworiented = PETSC_FALSE; 622 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 623 while (1) { 624 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 625 if (!flg) break; 626 627 for (i=0; i<n;) { 628 /* Now identify the consecutive vals belonging to the same row */ 629 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 630 if (j < n) ncols = j-i; 631 else ncols = n-i; 632 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 633 i = j; 634 } 635 } 636 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 637 baij->roworiented = r1; 638 a->roworiented = r2; 639 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 640 } 641 642 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 643 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 644 645 /* determine if any processor has disassembled, if so we must 646 also disassemble ourselfs, in order that we may reassemble. */ 647 /* 648 if nonzero structure of submatrix B cannot change then we know that 649 no processor disassembled thus we can skip this stuff 650 */ 651 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 652 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 653 if (mat->was_assembled && !other_disassembled) { 654 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 655 } 656 } 657 658 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 659 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 660 } 661 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 662 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 663 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 664 665 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 666 baij->rowvalues = 0; 667 668 PetscFunctionReturn(0); 669 } 670 671 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 672 #undef __FUNCT__ 673 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 674 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 675 { 676 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 677 PetscErrorCode ierr; 678 PetscInt bs = mat->rmap.bs; 679 PetscMPIInt size = baij->size,rank = baij->rank; 680 PetscTruth iascii,isdraw; 681 PetscViewer sviewer; 682 PetscViewerFormat format; 683 684 PetscFunctionBegin; 685 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 686 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 687 if (iascii) { 688 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 689 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 690 MatInfo info; 691 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 692 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 693 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 694 rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 695 mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr); 696 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 697 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 698 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 699 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 700 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 701 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 702 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 703 PetscFunctionReturn(0); 704 } else if (format == PETSC_VIEWER_ASCII_INFO) { 705 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 706 PetscFunctionReturn(0); 707 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 708 PetscFunctionReturn(0); 709 } 710 } 711 712 if (isdraw) { 713 PetscDraw draw; 714 PetscTruth isnull; 715 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 716 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 717 } 718 719 if (size == 1) { 720 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 721 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 722 } else { 723 /* assemble the entire matrix onto first processor. */ 724 Mat A; 725 Mat_SeqSBAIJ *Aloc; 726 Mat_SeqBAIJ *Bloc; 727 PetscInt M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 728 MatScalar *a; 729 730 /* Should this be the same type as mat? */ 731 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 732 if (!rank) { 733 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 734 } else { 735 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 736 } 737 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 738 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 739 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 740 741 /* copy over the A part */ 742 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 743 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 744 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 745 746 for (i=0; i<mbs; i++) { 747 rvals[0] = bs*(baij->rstartbs + i); 748 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 749 for (j=ai[i]; j<ai[i+1]; j++) { 750 col = (baij->cstartbs+aj[j])*bs; 751 for (k=0; k<bs; k++) { 752 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 753 col++; a += bs; 754 } 755 } 756 } 757 /* copy over the B part */ 758 Bloc = (Mat_SeqBAIJ*)baij->B->data; 759 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 760 for (i=0; i<mbs; i++) { 761 762 rvals[0] = bs*(baij->rstartbs + i); 763 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 764 for (j=ai[i]; j<ai[i+1]; j++) { 765 col = baij->garray[aj[j]]*bs; 766 for (k=0; k<bs; k++) { 767 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 768 col++; a += bs; 769 } 770 } 771 } 772 ierr = PetscFree(rvals);CHKERRQ(ierr); 773 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 774 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 775 /* 776 Everyone has to call to draw the matrix since the graphics waits are 777 synchronized across all processors that share the PetscDraw object 778 */ 779 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 780 if (!rank) { 781 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 782 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 783 } 784 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 785 ierr = MatDestroy(A);CHKERRQ(ierr); 786 } 787 PetscFunctionReturn(0); 788 } 789 790 #undef __FUNCT__ 791 #define __FUNCT__ "MatView_MPISBAIJ" 792 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 793 { 794 PetscErrorCode ierr; 795 PetscTruth iascii,isdraw,issocket,isbinary; 796 797 PetscFunctionBegin; 798 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 799 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 800 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 801 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 802 if (iascii || isdraw || issocket || isbinary) { 803 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 804 } else { 805 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 806 } 807 PetscFunctionReturn(0); 808 } 809 810 #undef __FUNCT__ 811 #define __FUNCT__ "MatDestroy_MPISBAIJ" 812 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 813 { 814 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 815 PetscErrorCode ierr; 816 817 PetscFunctionBegin; 818 #if defined(PETSC_USE_LOG) 819 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N); 820 #endif 821 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 822 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 823 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 824 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 825 #if defined (PETSC_USE_CTABLE) 826 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 827 #else 828 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 829 #endif 830 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 831 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 832 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 833 if (baij->slvec0) { 834 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 835 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 836 } 837 if (baij->slvec1) { 838 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 839 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 840 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 841 } 842 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 843 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 844 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 845 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 846 #if defined(PETSC_USE_MAT_SINGLE) 847 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 848 #endif 849 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 850 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 851 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 852 ierr = PetscFree(baij);CHKERRQ(ierr); 853 854 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 855 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 856 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 857 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 858 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 859 PetscFunctionReturn(0); 860 } 861 862 #undef __FUNCT__ 863 #define __FUNCT__ "MatMult_MPISBAIJ" 864 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 865 { 866 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 867 PetscErrorCode ierr; 868 PetscInt nt,mbs=a->mbs,bs=A->rmap.bs; 869 PetscScalar *x,*from,zero=0.0; 870 871 PetscFunctionBegin; 872 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 873 if (nt != A->cmap.n) { 874 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 875 } 876 877 /* diagonal part */ 878 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 879 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 880 881 /* subdiagonal part */ 882 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 883 CHKMEMQ; 884 /* copy x into the vec slvec0 */ 885 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 886 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 887 CHKMEMQ; 888 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 889 CHKMEMQ; 890 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 891 892 CHKMEMQ; 893 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 894 CHKMEMQ; 895 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 896 CHKMEMQ; 897 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 898 CHKMEMQ; 899 /* supperdiagonal part */ 900 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 901 CHKMEMQ; 902 PetscFunctionReturn(0); 903 } 904 905 #undef __FUNCT__ 906 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 907 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 908 { 909 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 910 PetscErrorCode ierr; 911 PetscInt nt; 912 913 PetscFunctionBegin; 914 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 915 if (nt != A->cmap.n) { 916 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 917 } 918 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 919 if (nt != A->rmap.N) { 920 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 921 } 922 923 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 924 /* do diagonal part */ 925 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 926 /* do supperdiagonal part */ 927 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 928 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 929 /* do subdiagonal part */ 930 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 931 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 932 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 933 934 PetscFunctionReturn(0); 935 } 936 937 #undef __FUNCT__ 938 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 939 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 940 { 941 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 942 PetscErrorCode ierr; 943 PetscInt mbs=a->mbs,bs=A->rmap.bs; 944 PetscScalar *x,*from,zero=0.0; 945 946 PetscFunctionBegin; 947 /* 948 PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n"); 949 PetscSynchronizedFlush(A->comm); 950 */ 951 /* diagonal part */ 952 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 953 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 954 955 /* subdiagonal part */ 956 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 957 958 /* copy x into the vec slvec0 */ 959 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 960 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 961 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 962 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 963 964 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 965 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 966 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 967 968 /* supperdiagonal part */ 969 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 970 971 PetscFunctionReturn(0); 972 } 973 974 #undef __FUNCT__ 975 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 976 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 977 { 978 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 979 PetscErrorCode ierr; 980 981 PetscFunctionBegin; 982 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 983 /* do diagonal part */ 984 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 985 /* do supperdiagonal part */ 986 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 987 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 988 989 /* do subdiagonal part */ 990 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 991 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 992 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 993 994 PetscFunctionReturn(0); 995 } 996 997 /* 998 This only works correctly for square matrices where the subblock A->A is the 999 diagonal block 1000 */ 1001 #undef __FUNCT__ 1002 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 1003 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1004 { 1005 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1006 PetscErrorCode ierr; 1007 1008 PetscFunctionBegin; 1009 /* if (a->rmap.N != a->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1010 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1011 PetscFunctionReturn(0); 1012 } 1013 1014 #undef __FUNCT__ 1015 #define __FUNCT__ "MatScale_MPISBAIJ" 1016 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1017 { 1018 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1019 PetscErrorCode ierr; 1020 1021 PetscFunctionBegin; 1022 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1023 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1024 PetscFunctionReturn(0); 1025 } 1026 1027 #undef __FUNCT__ 1028 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1029 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1030 { 1031 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1032 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1033 PetscErrorCode ierr; 1034 PetscInt bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1035 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend; 1036 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1037 1038 PetscFunctionBegin; 1039 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1040 mat->getrowactive = PETSC_TRUE; 1041 1042 if (!mat->rowvalues && (idx || v)) { 1043 /* 1044 allocate enough space to hold information from the longest row. 1045 */ 1046 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1047 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1048 PetscInt max = 1,mbs = mat->mbs,tmp; 1049 for (i=0; i<mbs; i++) { 1050 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1051 if (max < tmp) { max = tmp; } 1052 } 1053 ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1054 mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1055 } 1056 1057 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1058 lrow = row - brstart; /* local row index */ 1059 1060 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1061 if (!v) {pvA = 0; pvB = 0;} 1062 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1063 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1064 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1065 nztot = nzA + nzB; 1066 1067 cmap = mat->garray; 1068 if (v || idx) { 1069 if (nztot) { 1070 /* Sort by increasing column numbers, assuming A and B already sorted */ 1071 PetscInt imark = -1; 1072 if (v) { 1073 *v = v_p = mat->rowvalues; 1074 for (i=0; i<nzB; i++) { 1075 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1076 else break; 1077 } 1078 imark = i; 1079 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1080 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1081 } 1082 if (idx) { 1083 *idx = idx_p = mat->rowindices; 1084 if (imark > -1) { 1085 for (i=0; i<imark; i++) { 1086 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1087 } 1088 } else { 1089 for (i=0; i<nzB; i++) { 1090 if (cmap[cworkB[i]/bs] < cstart) 1091 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1092 else break; 1093 } 1094 imark = i; 1095 } 1096 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1097 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1098 } 1099 } else { 1100 if (idx) *idx = 0; 1101 if (v) *v = 0; 1102 } 1103 } 1104 *nz = nztot; 1105 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1106 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1107 PetscFunctionReturn(0); 1108 } 1109 1110 #undef __FUNCT__ 1111 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1112 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1113 { 1114 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1115 1116 PetscFunctionBegin; 1117 if (!baij->getrowactive) { 1118 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1119 } 1120 baij->getrowactive = PETSC_FALSE; 1121 PetscFunctionReturn(0); 1122 } 1123 1124 #undef __FUNCT__ 1125 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1126 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1127 { 1128 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1129 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1130 1131 PetscFunctionBegin; 1132 aA->getrow_utriangular = PETSC_TRUE; 1133 PetscFunctionReturn(0); 1134 } 1135 #undef __FUNCT__ 1136 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1137 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1138 { 1139 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1140 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1141 1142 PetscFunctionBegin; 1143 aA->getrow_utriangular = PETSC_FALSE; 1144 PetscFunctionReturn(0); 1145 } 1146 1147 #undef __FUNCT__ 1148 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1149 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1150 { 1151 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1152 PetscErrorCode ierr; 1153 1154 PetscFunctionBegin; 1155 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1156 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1157 PetscFunctionReturn(0); 1158 } 1159 1160 #undef __FUNCT__ 1161 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1162 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1163 { 1164 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1165 PetscErrorCode ierr; 1166 1167 PetscFunctionBegin; 1168 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1169 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1170 PetscFunctionReturn(0); 1171 } 1172 1173 #undef __FUNCT__ 1174 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1175 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1176 { 1177 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1178 PetscErrorCode ierr; 1179 1180 PetscFunctionBegin; 1181 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1182 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1183 PetscFunctionReturn(0); 1184 } 1185 1186 #undef __FUNCT__ 1187 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1188 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1189 { 1190 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1191 Mat A = a->A,B = a->B; 1192 PetscErrorCode ierr; 1193 PetscReal isend[5],irecv[5]; 1194 1195 PetscFunctionBegin; 1196 info->block_size = (PetscReal)matin->rmap.bs; 1197 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1198 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1199 isend[3] = info->memory; isend[4] = info->mallocs; 1200 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1201 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1202 isend[3] += info->memory; isend[4] += info->mallocs; 1203 if (flag == MAT_LOCAL) { 1204 info->nz_used = isend[0]; 1205 info->nz_allocated = isend[1]; 1206 info->nz_unneeded = isend[2]; 1207 info->memory = isend[3]; 1208 info->mallocs = isend[4]; 1209 } else if (flag == MAT_GLOBAL_MAX) { 1210 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1211 info->nz_used = irecv[0]; 1212 info->nz_allocated = irecv[1]; 1213 info->nz_unneeded = irecv[2]; 1214 info->memory = irecv[3]; 1215 info->mallocs = irecv[4]; 1216 } else if (flag == MAT_GLOBAL_SUM) { 1217 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1218 info->nz_used = irecv[0]; 1219 info->nz_allocated = irecv[1]; 1220 info->nz_unneeded = irecv[2]; 1221 info->memory = irecv[3]; 1222 info->mallocs = irecv[4]; 1223 } else { 1224 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1225 } 1226 info->rows_global = (PetscReal)A->rmap.N; 1227 info->columns_global = (PetscReal)A->cmap.N; 1228 info->rows_local = (PetscReal)A->rmap.N; 1229 info->columns_local = (PetscReal)A->cmap.N; 1230 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1231 info->fill_ratio_needed = 0; 1232 info->factor_mallocs = 0; 1233 PetscFunctionReturn(0); 1234 } 1235 1236 #undef __FUNCT__ 1237 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1238 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscTruth flg) 1239 { 1240 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1241 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1242 PetscErrorCode ierr; 1243 1244 PetscFunctionBegin; 1245 switch (op) { 1246 case MAT_NEW_NONZERO_LOCATIONS: 1247 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1248 case MAT_KEEP_ZEROED_ROWS: 1249 case MAT_NEW_NONZERO_LOCATION_ERR: 1250 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1251 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1252 break; 1253 case MAT_ROW_ORIENTED: 1254 a->roworiented = flg; 1255 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1256 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1257 break; 1258 case MAT_NEW_DIAGONALS: 1259 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1260 break; 1261 case MAT_IGNORE_OFF_PROC_ENTRIES: 1262 a->donotstash = flg; 1263 break; 1264 case MAT_USE_HASH_TABLE: 1265 a->ht_flag = flg; 1266 break; 1267 case MAT_HERMITIAN: 1268 if (flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 1269 case MAT_SYMMETRIC: 1270 case MAT_STRUCTURALLY_SYMMETRIC: 1271 case MAT_SYMMETRY_ETERNAL: 1272 if (!flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 1273 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1274 break; 1275 case MAT_IGNORE_LOWER_TRIANGULAR: 1276 aA->ignore_ltriangular = flg; 1277 break; 1278 case MAT_ERROR_LOWER_TRIANGULAR: 1279 aA->ignore_ltriangular = flg; 1280 break; 1281 case MAT_GETROW_UPPERTRIANGULAR: 1282 aA->getrow_utriangular = flg; 1283 break; 1284 default: 1285 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1286 } 1287 PetscFunctionReturn(0); 1288 } 1289 1290 #undef __FUNCT__ 1291 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1292 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B) 1293 { 1294 PetscErrorCode ierr; 1295 PetscFunctionBegin; 1296 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1297 PetscFunctionReturn(0); 1298 } 1299 1300 #undef __FUNCT__ 1301 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1302 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1303 { 1304 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1305 Mat a=baij->A, b=baij->B; 1306 PetscErrorCode ierr; 1307 PetscInt nv,m,n; 1308 PetscTruth flg; 1309 1310 PetscFunctionBegin; 1311 if (ll != rr){ 1312 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1313 if (!flg) 1314 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1315 } 1316 if (!ll) PetscFunctionReturn(0); 1317 1318 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1319 if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1320 1321 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1322 if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1323 1324 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1325 1326 /* left diagonalscale the off-diagonal part */ 1327 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1328 1329 /* scale the diagonal part */ 1330 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1331 1332 /* right diagonalscale the off-diagonal part */ 1333 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1334 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1335 PetscFunctionReturn(0); 1336 } 1337 1338 #undef __FUNCT__ 1339 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1340 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1341 { 1342 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1343 PetscErrorCode ierr; 1344 1345 PetscFunctionBegin; 1346 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1347 PetscFunctionReturn(0); 1348 } 1349 1350 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1351 1352 #undef __FUNCT__ 1353 #define __FUNCT__ "MatEqual_MPISBAIJ" 1354 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1355 { 1356 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1357 Mat a,b,c,d; 1358 PetscTruth flg; 1359 PetscErrorCode ierr; 1360 1361 PetscFunctionBegin; 1362 a = matA->A; b = matA->B; 1363 c = matB->A; d = matB->B; 1364 1365 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1366 if (flg) { 1367 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1368 } 1369 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1370 PetscFunctionReturn(0); 1371 } 1372 1373 #undef __FUNCT__ 1374 #define __FUNCT__ "MatCopy_MPISBAIJ" 1375 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1376 { 1377 PetscErrorCode ierr; 1378 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1379 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1380 1381 PetscFunctionBegin; 1382 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1383 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1384 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1385 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1386 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1387 } else { 1388 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1389 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1390 } 1391 PetscFunctionReturn(0); 1392 } 1393 1394 #undef __FUNCT__ 1395 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1396 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A) 1397 { 1398 PetscErrorCode ierr; 1399 1400 PetscFunctionBegin; 1401 ierr = MatMPISBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1402 PetscFunctionReturn(0); 1403 } 1404 1405 #include "petscblaslapack.h" 1406 #undef __FUNCT__ 1407 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1408 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1409 { 1410 PetscErrorCode ierr; 1411 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data; 1412 PetscBLASInt bnz,one=1; 1413 Mat_SeqSBAIJ *xa,*ya; 1414 Mat_SeqBAIJ *xb,*yb; 1415 1416 PetscFunctionBegin; 1417 if (str == SAME_NONZERO_PATTERN) { 1418 PetscScalar alpha = a; 1419 xa = (Mat_SeqSBAIJ *)xx->A->data; 1420 ya = (Mat_SeqSBAIJ *)yy->A->data; 1421 bnz = (PetscBLASInt)xa->nz; 1422 BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one); 1423 xb = (Mat_SeqBAIJ *)xx->B->data; 1424 yb = (Mat_SeqBAIJ *)yy->B->data; 1425 bnz = (PetscBLASInt)xb->nz; 1426 BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one); 1427 } else { 1428 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1429 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1430 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1431 } 1432 PetscFunctionReturn(0); 1433 } 1434 1435 #undef __FUNCT__ 1436 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1437 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1438 { 1439 PetscErrorCode ierr; 1440 PetscInt i; 1441 PetscTruth flg; 1442 1443 PetscFunctionBegin; 1444 for (i=0; i<n; i++) { 1445 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1446 if (!flg) { 1447 SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1448 } 1449 } 1450 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1451 PetscFunctionReturn(0); 1452 } 1453 1454 1455 /* -------------------------------------------------------------------*/ 1456 static struct _MatOps MatOps_Values = { 1457 MatSetValues_MPISBAIJ, 1458 MatGetRow_MPISBAIJ, 1459 MatRestoreRow_MPISBAIJ, 1460 MatMult_MPISBAIJ, 1461 /* 4*/ MatMultAdd_MPISBAIJ, 1462 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1463 MatMultAdd_MPISBAIJ, 1464 0, 1465 0, 1466 0, 1467 /*10*/ 0, 1468 0, 1469 0, 1470 MatRelax_MPISBAIJ, 1471 MatTranspose_MPISBAIJ, 1472 /*15*/ MatGetInfo_MPISBAIJ, 1473 MatEqual_MPISBAIJ, 1474 MatGetDiagonal_MPISBAIJ, 1475 MatDiagonalScale_MPISBAIJ, 1476 MatNorm_MPISBAIJ, 1477 /*20*/ MatAssemblyBegin_MPISBAIJ, 1478 MatAssemblyEnd_MPISBAIJ, 1479 0, 1480 MatSetOption_MPISBAIJ, 1481 MatZeroEntries_MPISBAIJ, 1482 /*25*/ 0, 1483 0, 1484 0, 1485 0, 1486 0, 1487 /*30*/ MatSetUpPreallocation_MPISBAIJ, 1488 0, 1489 0, 1490 0, 1491 0, 1492 /*35*/ MatDuplicate_MPISBAIJ, 1493 0, 1494 0, 1495 0, 1496 0, 1497 /*40*/ MatAXPY_MPISBAIJ, 1498 MatGetSubMatrices_MPISBAIJ, 1499 MatIncreaseOverlap_MPISBAIJ, 1500 MatGetValues_MPISBAIJ, 1501 MatCopy_MPISBAIJ, 1502 /*45*/ 0, 1503 MatScale_MPISBAIJ, 1504 0, 1505 0, 1506 0, 1507 /*50*/ 0, 1508 0, 1509 0, 1510 0, 1511 0, 1512 /*55*/ 0, 1513 0, 1514 MatSetUnfactored_MPISBAIJ, 1515 0, 1516 MatSetValuesBlocked_MPISBAIJ, 1517 /*60*/ 0, 1518 0, 1519 0, 1520 0, 1521 0, 1522 /*65*/ 0, 1523 0, 1524 0, 1525 0, 1526 0, 1527 /*70*/ MatGetRowMaxAbs_MPISBAIJ, 1528 0, 1529 0, 1530 0, 1531 0, 1532 /*75*/ 0, 1533 0, 1534 0, 1535 0, 1536 0, 1537 /*80*/ 0, 1538 0, 1539 0, 1540 0, 1541 MatLoad_MPISBAIJ, 1542 /*85*/ 0, 1543 0, 1544 0, 1545 0, 1546 0, 1547 /*90*/ 0, 1548 0, 1549 0, 1550 0, 1551 0, 1552 /*95*/ 0, 1553 0, 1554 0, 1555 0, 1556 0, 1557 /*100*/0, 1558 0, 1559 0, 1560 0, 1561 0, 1562 /*105*/0, 1563 MatRealPart_MPISBAIJ, 1564 MatImaginaryPart_MPISBAIJ, 1565 MatGetRowUpperTriangular_MPISBAIJ, 1566 MatRestoreRowUpperTriangular_MPISBAIJ 1567 }; 1568 1569 1570 EXTERN_C_BEGIN 1571 #undef __FUNCT__ 1572 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1573 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1574 { 1575 PetscFunctionBegin; 1576 *a = ((Mat_MPISBAIJ *)A->data)->A; 1577 *iscopy = PETSC_FALSE; 1578 PetscFunctionReturn(0); 1579 } 1580 EXTERN_C_END 1581 1582 EXTERN_C_BEGIN 1583 #undef __FUNCT__ 1584 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1585 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 1586 { 1587 Mat_MPISBAIJ *b; 1588 PetscErrorCode ierr; 1589 PetscInt i,mbs,Mbs; 1590 1591 PetscFunctionBegin; 1592 ierr = PetscOptionsBegin(B->comm,B->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr); 1593 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 1594 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1595 1596 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1597 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1598 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1599 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1600 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1601 1602 B->rmap.bs = B->cmap.bs = bs; 1603 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 1604 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 1605 1606 if (d_nnz) { 1607 for (i=0; i<B->rmap.n/bs; i++) { 1608 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 1609 } 1610 } 1611 if (o_nnz) { 1612 for (i=0; i<B->rmap.n/bs; i++) { 1613 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 1614 } 1615 } 1616 B->preallocated = PETSC_TRUE; 1617 1618 b = (Mat_MPISBAIJ*)B->data; 1619 mbs = B->rmap.n/bs; 1620 Mbs = B->rmap.N/bs; 1621 if (mbs*bs != B->rmap.n) { 1622 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap.N,bs); 1623 } 1624 1625 B->rmap.bs = bs; 1626 b->bs2 = bs*bs; 1627 b->mbs = mbs; 1628 b->nbs = mbs; 1629 b->Mbs = Mbs; 1630 b->Nbs = Mbs; 1631 1632 for (i=0; i<=b->size; i++) { 1633 b->rangebs[i] = B->rmap.range[i]/bs; 1634 } 1635 b->rstartbs = B->rmap.rstart/bs; 1636 b->rendbs = B->rmap.rend/bs; 1637 1638 b->cstartbs = B->cmap.rstart/bs; 1639 b->cendbs = B->cmap.rend/bs; 1640 1641 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1642 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 1643 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1644 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1645 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1646 1647 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1648 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 1649 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1650 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1651 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1652 1653 /* build cache for off array entries formed */ 1654 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1655 1656 PetscFunctionReturn(0); 1657 } 1658 EXTERN_C_END 1659 1660 /*MC 1661 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1662 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1663 1664 Options Database Keys: 1665 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1666 1667 Level: beginner 1668 1669 .seealso: MatCreateMPISBAIJ 1670 M*/ 1671 1672 EXTERN_C_BEGIN 1673 #undef __FUNCT__ 1674 #define __FUNCT__ "MatCreate_MPISBAIJ" 1675 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B) 1676 { 1677 Mat_MPISBAIJ *b; 1678 PetscErrorCode ierr; 1679 PetscTruth flg; 1680 1681 PetscFunctionBegin; 1682 1683 ierr = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1684 B->data = (void*)b; 1685 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1686 1687 B->ops->destroy = MatDestroy_MPISBAIJ; 1688 B->ops->view = MatView_MPISBAIJ; 1689 B->mapping = 0; 1690 B->factor = 0; 1691 B->assembled = PETSC_FALSE; 1692 1693 B->insertmode = NOT_SET_VALUES; 1694 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1695 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1696 1697 /* build local table of row and column ownerships */ 1698 ierr = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 1699 1700 /* build cache for off array entries formed */ 1701 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1702 b->donotstash = PETSC_FALSE; 1703 b->colmap = PETSC_NULL; 1704 b->garray = PETSC_NULL; 1705 b->roworiented = PETSC_TRUE; 1706 1707 #if defined(PETSC_USE_MAT_SINGLE) 1708 /* stuff for MatSetValues_XXX in single precision */ 1709 b->setvalueslen = 0; 1710 b->setvaluescopy = PETSC_NULL; 1711 #endif 1712 1713 /* stuff used in block assembly */ 1714 b->barray = 0; 1715 1716 /* stuff used for matrix vector multiply */ 1717 b->lvec = 0; 1718 b->Mvctx = 0; 1719 b->slvec0 = 0; 1720 b->slvec0b = 0; 1721 b->slvec1 = 0; 1722 b->slvec1a = 0; 1723 b->slvec1b = 0; 1724 b->sMvctx = 0; 1725 1726 /* stuff for MatGetRow() */ 1727 b->rowindices = 0; 1728 b->rowvalues = 0; 1729 b->getrowactive = PETSC_FALSE; 1730 1731 /* hash table stuff */ 1732 b->ht = 0; 1733 b->hd = 0; 1734 b->ht_size = 0; 1735 b->ht_flag = PETSC_FALSE; 1736 b->ht_fact = 0; 1737 b->ht_total_ct = 0; 1738 b->ht_insert_ct = 0; 1739 1740 b->in_loc = 0; 1741 b->v_loc = 0; 1742 b->n_loc = 0; 1743 ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 1744 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 1745 if (flg) { 1746 PetscReal fact = 1.39; 1747 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 1748 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 1749 if (fact <= 1.0) fact = 1.39; 1750 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1751 ierr = PetscInfo1(0,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 1752 } 1753 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1754 1755 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1756 "MatStoreValues_MPISBAIJ", 1757 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1758 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1759 "MatRetrieveValues_MPISBAIJ", 1760 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1761 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1762 "MatGetDiagonalBlock_MPISBAIJ", 1763 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1764 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1765 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1766 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1767 B->symmetric = PETSC_TRUE; 1768 B->structurally_symmetric = PETSC_TRUE; 1769 B->symmetric_set = PETSC_TRUE; 1770 B->structurally_symmetric_set = PETSC_TRUE; 1771 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 1772 PetscFunctionReturn(0); 1773 } 1774 EXTERN_C_END 1775 1776 /*MC 1777 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1778 1779 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1780 and MATMPISBAIJ otherwise. 1781 1782 Options Database Keys: 1783 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1784 1785 Level: beginner 1786 1787 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1788 M*/ 1789 1790 EXTERN_C_BEGIN 1791 #undef __FUNCT__ 1792 #define __FUNCT__ "MatCreate_SBAIJ" 1793 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A) 1794 { 1795 PetscErrorCode ierr; 1796 PetscMPIInt size; 1797 1798 PetscFunctionBegin; 1799 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1800 if (size == 1) { 1801 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1802 } else { 1803 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1804 } 1805 PetscFunctionReturn(0); 1806 } 1807 EXTERN_C_END 1808 1809 #undef __FUNCT__ 1810 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1811 /*@C 1812 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1813 the user should preallocate the matrix storage by setting the parameters 1814 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1815 performance can be increased by more than a factor of 50. 1816 1817 Collective on Mat 1818 1819 Input Parameters: 1820 + A - the matrix 1821 . bs - size of blockk 1822 . d_nz - number of block nonzeros per block row in diagonal portion of local 1823 submatrix (same for all local rows) 1824 . d_nnz - array containing the number of block nonzeros in the various block rows 1825 in the upper triangular and diagonal part of the in diagonal portion of the local 1826 (possibly different for each block row) or PETSC_NULL. You must leave room 1827 for the diagonal entry even if it is zero. 1828 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1829 submatrix (same for all local rows). 1830 - o_nnz - array containing the number of nonzeros in the various block rows of the 1831 off-diagonal portion of the local submatrix (possibly different for 1832 each block row) or PETSC_NULL. 1833 1834 1835 Options Database Keys: 1836 . -mat_no_unroll - uses code that does not unroll the loops in the 1837 block calculations (much slower) 1838 . -mat_block_size - size of the blocks to use 1839 1840 Notes: 1841 1842 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1843 than it must be used on all processors that share the object for that argument. 1844 1845 If the *_nnz parameter is given then the *_nz parameter is ignored 1846 1847 Storage Information: 1848 For a square global matrix we define each processor's diagonal portion 1849 to be its local rows and the corresponding columns (a square submatrix); 1850 each processor's off-diagonal portion encompasses the remainder of the 1851 local matrix (a rectangular submatrix). 1852 1853 The user can specify preallocated storage for the diagonal part of 1854 the local submatrix with either d_nz or d_nnz (not both). Set 1855 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1856 memory allocation. Likewise, specify preallocated storage for the 1857 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1858 1859 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1860 the figure below we depict these three local rows and all columns (0-11). 1861 1862 .vb 1863 0 1 2 3 4 5 6 7 8 9 10 11 1864 ------------------- 1865 row 3 | o o o d d d o o o o o o 1866 row 4 | o o o d d d o o o o o o 1867 row 5 | o o o d d d o o o o o o 1868 ------------------- 1869 .ve 1870 1871 Thus, any entries in the d locations are stored in the d (diagonal) 1872 submatrix, and any entries in the o locations are stored in the 1873 o (off-diagonal) submatrix. Note that the d matrix is stored in 1874 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1875 1876 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1877 plus the diagonal part of the d matrix, 1878 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1879 In general, for PDE problems in which most nonzeros are near the diagonal, 1880 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1881 or you will get TERRIBLE performance; see the users' manual chapter on 1882 matrices. 1883 1884 Level: intermediate 1885 1886 .keywords: matrix, block, aij, compressed row, sparse, parallel 1887 1888 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1889 @*/ 1890 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1891 { 1892 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 1893 1894 PetscFunctionBegin; 1895 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1896 if (f) { 1897 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1898 } 1899 PetscFunctionReturn(0); 1900 } 1901 1902 #undef __FUNCT__ 1903 #define __FUNCT__ "MatCreateMPISBAIJ" 1904 /*@C 1905 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1906 (block compressed row). For good matrix assembly performance 1907 the user should preallocate the matrix storage by setting the parameters 1908 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1909 performance can be increased by more than a factor of 50. 1910 1911 Collective on MPI_Comm 1912 1913 Input Parameters: 1914 + comm - MPI communicator 1915 . bs - size of blockk 1916 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1917 This value should be the same as the local size used in creating the 1918 y vector for the matrix-vector product y = Ax. 1919 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1920 This value should be the same as the local size used in creating the 1921 x vector for the matrix-vector product y = Ax. 1922 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1923 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1924 . d_nz - number of block nonzeros per block row in diagonal portion of local 1925 submatrix (same for all local rows) 1926 . d_nnz - array containing the number of block nonzeros in the various block rows 1927 in the upper triangular portion of the in diagonal portion of the local 1928 (possibly different for each block block row) or PETSC_NULL. 1929 You must leave room for the diagonal entry even if it is zero. 1930 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1931 submatrix (same for all local rows). 1932 - o_nnz - array containing the number of nonzeros in the various block rows of the 1933 off-diagonal portion of the local submatrix (possibly different for 1934 each block row) or PETSC_NULL. 1935 1936 Output Parameter: 1937 . A - the matrix 1938 1939 Options Database Keys: 1940 . -mat_no_unroll - uses code that does not unroll the loops in the 1941 block calculations (much slower) 1942 . -mat_block_size - size of the blocks to use 1943 . -mat_mpi - use the parallel matrix data structures even on one processor 1944 (defaults to using SeqBAIJ format on one processor) 1945 1946 Notes: 1947 The number of rows and columns must be divisible by blocksize. 1948 This matrix type does not support complex Hermitian operation. 1949 1950 The user MUST specify either the local or global matrix dimensions 1951 (possibly both). 1952 1953 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1954 than it must be used on all processors that share the object for that argument. 1955 1956 If the *_nnz parameter is given then the *_nz parameter is ignored 1957 1958 Storage Information: 1959 For a square global matrix we define each processor's diagonal portion 1960 to be its local rows and the corresponding columns (a square submatrix); 1961 each processor's off-diagonal portion encompasses the remainder of the 1962 local matrix (a rectangular submatrix). 1963 1964 The user can specify preallocated storage for the diagonal part of 1965 the local submatrix with either d_nz or d_nnz (not both). Set 1966 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1967 memory allocation. Likewise, specify preallocated storage for the 1968 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1969 1970 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1971 the figure below we depict these three local rows and all columns (0-11). 1972 1973 .vb 1974 0 1 2 3 4 5 6 7 8 9 10 11 1975 ------------------- 1976 row 3 | o o o d d d o o o o o o 1977 row 4 | o o o d d d o o o o o o 1978 row 5 | o o o d d d o o o o o o 1979 ------------------- 1980 .ve 1981 1982 Thus, any entries in the d locations are stored in the d (diagonal) 1983 submatrix, and any entries in the o locations are stored in the 1984 o (off-diagonal) submatrix. Note that the d matrix is stored in 1985 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1986 1987 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1988 plus the diagonal part of the d matrix, 1989 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1990 In general, for PDE problems in which most nonzeros are near the diagonal, 1991 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1992 or you will get TERRIBLE performance; see the users' manual chapter on 1993 matrices. 1994 1995 Level: intermediate 1996 1997 .keywords: matrix, block, aij, compressed row, sparse, parallel 1998 1999 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2000 @*/ 2001 2002 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2003 { 2004 PetscErrorCode ierr; 2005 PetscMPIInt size; 2006 2007 PetscFunctionBegin; 2008 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2009 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2010 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2011 if (size > 1) { 2012 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2013 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2014 } else { 2015 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2016 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2017 } 2018 PetscFunctionReturn(0); 2019 } 2020 2021 2022 #undef __FUNCT__ 2023 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2024 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2025 { 2026 Mat mat; 2027 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2028 PetscErrorCode ierr; 2029 PetscInt len=0,nt,bs=matin->rmap.bs,mbs=oldmat->mbs; 2030 PetscScalar *array; 2031 2032 PetscFunctionBegin; 2033 *newmat = 0; 2034 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 2035 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2036 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2037 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2038 ierr = PetscMapCopy(matin->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2039 ierr = PetscMapCopy(matin->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2040 2041 mat->factor = matin->factor; 2042 mat->preallocated = PETSC_TRUE; 2043 mat->assembled = PETSC_TRUE; 2044 mat->insertmode = NOT_SET_VALUES; 2045 2046 a = (Mat_MPISBAIJ*)mat->data; 2047 a->bs2 = oldmat->bs2; 2048 a->mbs = oldmat->mbs; 2049 a->nbs = oldmat->nbs; 2050 a->Mbs = oldmat->Mbs; 2051 a->Nbs = oldmat->Nbs; 2052 2053 2054 a->size = oldmat->size; 2055 a->rank = oldmat->rank; 2056 a->donotstash = oldmat->donotstash; 2057 a->roworiented = oldmat->roworiented; 2058 a->rowindices = 0; 2059 a->rowvalues = 0; 2060 a->getrowactive = PETSC_FALSE; 2061 a->barray = 0; 2062 a->rstartbs = oldmat->rstartbs; 2063 a->rendbs = oldmat->rendbs; 2064 a->cstartbs = oldmat->cstartbs; 2065 a->cendbs = oldmat->cendbs; 2066 2067 /* hash table stuff */ 2068 a->ht = 0; 2069 a->hd = 0; 2070 a->ht_size = 0; 2071 a->ht_flag = oldmat->ht_flag; 2072 a->ht_fact = oldmat->ht_fact; 2073 a->ht_total_ct = 0; 2074 a->ht_insert_ct = 0; 2075 2076 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2077 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2078 ierr = MatStashCreate_Private(matin->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr); 2079 if (oldmat->colmap) { 2080 #if defined (PETSC_USE_CTABLE) 2081 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2082 #else 2083 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2084 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2085 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2086 #endif 2087 } else a->colmap = 0; 2088 2089 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2090 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2091 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2092 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2093 } else a->garray = 0; 2094 2095 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2096 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2097 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2098 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2099 2100 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2101 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2102 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2103 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2104 2105 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2106 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2107 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2108 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2109 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2110 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2111 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2112 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2113 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2114 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2115 ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr); 2116 ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr); 2117 ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr); 2118 2119 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2120 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2121 a->sMvctx = oldmat->sMvctx; 2122 ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr); 2123 2124 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2125 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2126 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2127 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2128 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2129 *newmat = mat; 2130 PetscFunctionReturn(0); 2131 } 2132 2133 #include "petscsys.h" 2134 2135 #undef __FUNCT__ 2136 #define __FUNCT__ "MatLoad_MPISBAIJ" 2137 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2138 { 2139 Mat A; 2140 PetscErrorCode ierr; 2141 PetscInt i,nz,j,rstart,rend; 2142 PetscScalar *vals,*buf; 2143 MPI_Comm comm = ((PetscObject)viewer)->comm; 2144 MPI_Status status; 2145 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens; 2146 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2147 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2148 PetscInt bs=1,Mbs,mbs,extra_rows; 2149 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2150 PetscInt dcount,kmax,k,nzcount,tmp; 2151 int fd; 2152 2153 PetscFunctionBegin; 2154 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2155 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2156 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2157 2158 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2159 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2160 if (!rank) { 2161 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2162 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2163 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2164 if (header[3] < 0) { 2165 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2166 } 2167 } 2168 2169 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2170 M = header[1]; N = header[2]; 2171 2172 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2173 2174 /* 2175 This code adds extra rows to make sure the number of rows is 2176 divisible by the blocksize 2177 */ 2178 Mbs = M/bs; 2179 extra_rows = bs - M + bs*(Mbs); 2180 if (extra_rows == bs) extra_rows = 0; 2181 else Mbs++; 2182 if (extra_rows &&!rank) { 2183 ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2184 } 2185 2186 /* determine ownership of all rows */ 2187 mbs = Mbs/size + ((Mbs % size) > rank); 2188 m = mbs*bs; 2189 ierr = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 2190 browners = rowners + size + 1; 2191 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2192 rowners[0] = 0; 2193 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2194 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2195 rstart = rowners[rank]; 2196 rend = rowners[rank+1]; 2197 2198 /* distribute row lengths to all processors */ 2199 ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr); 2200 if (!rank) { 2201 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2202 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2203 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2204 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2205 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2206 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2207 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2208 } else { 2209 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2210 } 2211 2212 if (!rank) { /* procs[0] */ 2213 /* calculate the number of nonzeros on each processor */ 2214 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2215 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2216 for (i=0; i<size; i++) { 2217 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2218 procsnz[i] += rowlengths[j]; 2219 } 2220 } 2221 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2222 2223 /* determine max buffer needed and allocate it */ 2224 maxnz = 0; 2225 for (i=0; i<size; i++) { 2226 maxnz = PetscMax(maxnz,procsnz[i]); 2227 } 2228 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2229 2230 /* read in my part of the matrix column indices */ 2231 nz = procsnz[0]; 2232 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2233 mycols = ibuf; 2234 if (size == 1) nz -= extra_rows; 2235 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2236 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2237 2238 /* read in every ones (except the last) and ship off */ 2239 for (i=1; i<size-1; i++) { 2240 nz = procsnz[i]; 2241 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2242 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2243 } 2244 /* read in the stuff for the last proc */ 2245 if (size != 1) { 2246 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2247 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2248 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2249 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2250 } 2251 ierr = PetscFree(cols);CHKERRQ(ierr); 2252 } else { /* procs[i], i>0 */ 2253 /* determine buffer space needed for message */ 2254 nz = 0; 2255 for (i=0; i<m; i++) { 2256 nz += locrowlens[i]; 2257 } 2258 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2259 mycols = ibuf; 2260 /* receive message of column indices*/ 2261 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2262 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2263 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2264 } 2265 2266 /* loop over local rows, determining number of off diagonal entries */ 2267 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2268 odlens = dlens + (rend-rstart); 2269 ierr = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2270 ierr = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2271 masked1 = mask + Mbs; 2272 masked2 = masked1 + Mbs; 2273 rowcount = 0; nzcount = 0; 2274 for (i=0; i<mbs; i++) { 2275 dcount = 0; 2276 odcount = 0; 2277 for (j=0; j<bs; j++) { 2278 kmax = locrowlens[rowcount]; 2279 for (k=0; k<kmax; k++) { 2280 tmp = mycols[nzcount++]/bs; /* block col. index */ 2281 if (!mask[tmp]) { 2282 mask[tmp] = 1; 2283 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2284 else masked1[dcount++] = tmp; /* entry in diag portion */ 2285 } 2286 } 2287 rowcount++; 2288 } 2289 2290 dlens[i] = dcount; /* d_nzz[i] */ 2291 odlens[i] = odcount; /* o_nzz[i] */ 2292 2293 /* zero out the mask elements we set */ 2294 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2295 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2296 } 2297 2298 /* create our matrix */ 2299 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2300 ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2301 ierr = MatSetType(A,type);CHKERRQ(ierr); 2302 ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2303 2304 if (!rank) { 2305 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2306 /* read in my part of the matrix numerical values */ 2307 nz = procsnz[0]; 2308 vals = buf; 2309 mycols = ibuf; 2310 if (size == 1) nz -= extra_rows; 2311 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2312 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2313 2314 /* insert into matrix */ 2315 jj = rstart*bs; 2316 for (i=0; i<m; i++) { 2317 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2318 mycols += locrowlens[i]; 2319 vals += locrowlens[i]; 2320 jj++; 2321 } 2322 2323 /* read in other processors (except the last one) and ship out */ 2324 for (i=1; i<size-1; i++) { 2325 nz = procsnz[i]; 2326 vals = buf; 2327 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2328 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2329 } 2330 /* the last proc */ 2331 if (size != 1){ 2332 nz = procsnz[i] - extra_rows; 2333 vals = buf; 2334 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2335 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2336 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2337 } 2338 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2339 2340 } else { 2341 /* receive numeric values */ 2342 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2343 2344 /* receive message of values*/ 2345 vals = buf; 2346 mycols = ibuf; 2347 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2348 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2349 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2350 2351 /* insert into matrix */ 2352 jj = rstart*bs; 2353 for (i=0; i<m; i++) { 2354 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2355 mycols += locrowlens[i]; 2356 vals += locrowlens[i]; 2357 jj++; 2358 } 2359 } 2360 2361 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2362 ierr = PetscFree(buf);CHKERRQ(ierr); 2363 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2364 ierr = PetscFree(rowners);CHKERRQ(ierr); 2365 ierr = PetscFree(dlens);CHKERRQ(ierr); 2366 ierr = PetscFree(mask);CHKERRQ(ierr); 2367 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2368 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2369 *newmat = A; 2370 PetscFunctionReturn(0); 2371 } 2372 2373 #undef __FUNCT__ 2374 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2375 /*XXXXX@ 2376 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2377 2378 Input Parameters: 2379 . mat - the matrix 2380 . fact - factor 2381 2382 Collective on Mat 2383 2384 Level: advanced 2385 2386 Notes: 2387 This can also be set by the command line option: -mat_use_hash_table fact 2388 2389 .keywords: matrix, hashtable, factor, HT 2390 2391 .seealso: MatSetOption() 2392 @XXXXX*/ 2393 2394 2395 #undef __FUNCT__ 2396 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2397 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2398 { 2399 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2400 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2401 PetscReal atmp; 2402 PetscReal *work,*svalues,*rvalues; 2403 PetscErrorCode ierr; 2404 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2405 PetscMPIInt rank,size; 2406 PetscInt *rowners_bs,dest,count,source; 2407 PetscScalar *va; 2408 MatScalar *ba; 2409 MPI_Status stat; 2410 2411 PetscFunctionBegin; 2412 if (idx) SETERRQ(PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2413 ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr); 2414 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2415 2416 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2417 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2418 2419 bs = A->rmap.bs; 2420 mbs = a->mbs; 2421 Mbs = a->Mbs; 2422 ba = b->a; 2423 bi = b->i; 2424 bj = b->j; 2425 2426 /* find ownerships */ 2427 rowners_bs = A->rmap.range; 2428 2429 /* each proc creates an array to be distributed */ 2430 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2431 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2432 2433 /* row_max for B */ 2434 if (rank != size-1){ 2435 for (i=0; i<mbs; i++) { 2436 ncols = bi[1] - bi[0]; bi++; 2437 brow = bs*i; 2438 for (j=0; j<ncols; j++){ 2439 bcol = bs*(*bj); 2440 for (kcol=0; kcol<bs; kcol++){ 2441 col = bcol + kcol; /* local col index */ 2442 col += rowners_bs[rank+1]; /* global col index */ 2443 for (krow=0; krow<bs; krow++){ 2444 atmp = PetscAbsScalar(*ba); ba++; 2445 row = brow + krow; /* local row index */ 2446 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2447 if (work[col] < atmp) work[col] = atmp; 2448 } 2449 } 2450 bj++; 2451 } 2452 } 2453 2454 /* send values to its owners */ 2455 for (dest=rank+1; dest<size; dest++){ 2456 svalues = work + rowners_bs[dest]; 2457 count = rowners_bs[dest+1]-rowners_bs[dest]; 2458 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr); 2459 } 2460 } 2461 2462 /* receive values */ 2463 if (rank){ 2464 rvalues = work; 2465 count = rowners_bs[rank+1]-rowners_bs[rank]; 2466 for (source=0; source<rank; source++){ 2467 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr); 2468 /* process values */ 2469 for (i=0; i<count; i++){ 2470 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2471 } 2472 } 2473 } 2474 2475 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2476 ierr = PetscFree(work);CHKERRQ(ierr); 2477 PetscFunctionReturn(0); 2478 } 2479 2480 #undef __FUNCT__ 2481 #define __FUNCT__ "MatRelax_MPISBAIJ" 2482 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2483 { 2484 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2485 PetscErrorCode ierr; 2486 PetscInt mbs=mat->mbs,bs=matin->rmap.bs; 2487 PetscScalar *x,*b,*ptr,zero=0.0; 2488 Vec bb1; 2489 2490 PetscFunctionBegin; 2491 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2492 if (bs > 1) 2493 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2494 2495 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2496 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2497 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2498 its--; 2499 } 2500 2501 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2502 while (its--){ 2503 2504 /* lower triangular part: slvec0b = - B^T*xx */ 2505 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2506 2507 /* copy xx into slvec0a */ 2508 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2509 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2510 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2511 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2512 2513 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2514 2515 /* copy bb into slvec1a */ 2516 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2517 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2518 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2519 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2520 2521 /* set slvec1b = 0 */ 2522 ierr = VecSet(mat->slvec1b,zero);CHKERRQ(ierr); 2523 2524 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2525 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2526 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2527 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2528 2529 /* upper triangular part: bb1 = bb1 - B*x */ 2530 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2531 2532 /* local diagonal sweep */ 2533 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2534 } 2535 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2536 } else { 2537 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2538 } 2539 PetscFunctionReturn(0); 2540 } 2541 2542 #undef __FUNCT__ 2543 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2544 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2545 { 2546 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2547 PetscErrorCode ierr; 2548 Vec lvec1,bb1; 2549 2550 PetscFunctionBegin; 2551 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2552 if (matin->rmap.bs > 1) 2553 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2554 2555 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2556 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2557 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2558 its--; 2559 } 2560 2561 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2562 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2563 while (its--){ 2564 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2565 2566 /* lower diagonal part: bb1 = bb - B^T*xx */ 2567 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2568 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2569 2570 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2571 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2572 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2573 2574 /* upper diagonal part: bb1 = bb1 - B*x */ 2575 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2576 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2577 2578 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2579 2580 /* diagonal sweep */ 2581 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2582 } 2583 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2584 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2585 } else { 2586 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2587 } 2588 PetscFunctionReturn(0); 2589 } 2590 2591