1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "mpisbaij.h" 5 #include "src/mat/impls/sbaij/seq/sbaij.h" 6 7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat); 10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat); 19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*); 20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *); 21 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec); 22 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 23 24 /* UGLY, ugly, ugly 25 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 26 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 27 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 28 converts the entries into single precision and then calls ..._MatScalar() to put them 29 into the single precision data structures. 30 */ 31 #if defined(PETSC_USE_MAT_SINGLE) 32 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 33 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 34 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 35 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 36 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 37 #else 38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 39 #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 41 #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 43 #endif 44 45 EXTERN_C_BEGIN 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 48 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat) 49 { 50 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 51 PetscErrorCode ierr; 52 53 PetscFunctionBegin; 54 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 55 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 EXTERN_C_END 59 60 EXTERN_C_BEGIN 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 63 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat) 64 { 65 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 66 PetscErrorCode ierr; 67 68 PetscFunctionBegin; 69 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 70 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 EXTERN_C_END 74 75 76 #define CHUNKSIZE 10 77 78 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 79 { \ 80 \ 81 brow = row/bs; \ 82 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 83 rmax = aimax[brow]; nrow = ailen[brow]; \ 84 bcol = col/bs; \ 85 ridx = row % bs; cidx = col % bs; \ 86 low = 0; high = nrow; \ 87 while (high-low > 3) { \ 88 t = (low+high)/2; \ 89 if (rp[t] > bcol) high = t; \ 90 else low = t; \ 91 } \ 92 for (_i=low; _i<high; _i++) { \ 93 if (rp[_i] > bcol) break; \ 94 if (rp[_i] == bcol) { \ 95 bap = ap + bs2*_i + bs*cidx + ridx; \ 96 if (addv == ADD_VALUES) *bap += value; \ 97 else *bap = value; \ 98 goto a_noinsert; \ 99 } \ 100 } \ 101 if (a->nonew == 1) goto a_noinsert; \ 102 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 103 MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,aimax,a->nonew); \ 104 N = nrow++ - 1; \ 105 /* shift up all the later entries in this row */ \ 106 for (ii=N; ii>=_i; ii--) { \ 107 rp[ii+1] = rp[ii]; \ 108 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 109 } \ 110 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 111 rp[_i] = bcol; \ 112 ap[bs2*_i + bs*cidx + ridx] = value; \ 113 a_noinsert:; \ 114 ailen[brow] = nrow; \ 115 } 116 #ifndef MatSetValues_SeqBAIJ_B_Private 117 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 118 { \ 119 brow = row/bs; \ 120 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 121 rmax = bimax[brow]; nrow = bilen[brow]; \ 122 bcol = col/bs; \ 123 ridx = row % bs; cidx = col % bs; \ 124 low = 0; high = nrow; \ 125 while (high-low > 3) { \ 126 t = (low+high)/2; \ 127 if (rp[t] > bcol) high = t; \ 128 else low = t; \ 129 } \ 130 for (_i=low; _i<high; _i++) { \ 131 if (rp[_i] > bcol) break; \ 132 if (rp[_i] == bcol) { \ 133 bap = ap + bs2*_i + bs*cidx + ridx; \ 134 if (addv == ADD_VALUES) *bap += value; \ 135 else *bap = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (b->nonew == 1) goto b_noinsert; \ 140 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 141 MatSeqXAIJReallocateAIJ(b,bs2,nrow,brow,bcol,rmax,ba,bi,bj,b->mbs,rp,ap,bimax,b->nonew); \ 142 N = nrow++ - 1; \ 143 /* shift up all the later entries in this row */ \ 144 for (ii=N; ii>=_i; ii--) { \ 145 rp[ii+1] = rp[ii]; \ 146 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 147 } \ 148 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 149 rp[_i] = bcol; \ 150 ap[bs2*_i + bs*cidx + ridx] = value; \ 151 b_noinsert:; \ 152 bilen[brow] = nrow; \ 153 } 154 #endif 155 156 #if defined(PETSC_USE_MAT_SINGLE) 157 #undef __FUNCT__ 158 #define __FUNCT__ "MatSetValues_MPISBAIJ" 159 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 160 { 161 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 162 PetscErrorCode ierr; 163 PetscInt i,N = m*n; 164 MatScalar *vsingle; 165 166 PetscFunctionBegin; 167 if (N > b->setvalueslen) { 168 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 169 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 170 b->setvalueslen = N; 171 } 172 vsingle = b->setvaluescopy; 173 174 for (i=0; i<N; i++) { 175 vsingle[i] = v[i]; 176 } 177 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 178 PetscFunctionReturn(0); 179 } 180 181 #undef __FUNCT__ 182 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 183 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 184 { 185 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 186 PetscErrorCode ierr; 187 PetscInt i,N = m*n*b->bs2; 188 MatScalar *vsingle; 189 190 PetscFunctionBegin; 191 if (N > b->setvalueslen) { 192 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 193 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 194 b->setvalueslen = N; 195 } 196 vsingle = b->setvaluescopy; 197 for (i=0; i<N; i++) { 198 vsingle[i] = v[i]; 199 } 200 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 201 PetscFunctionReturn(0); 202 } 203 #endif 204 205 /* Only add/insert a(i,j) with i<=j (blocks). 206 Any a(i,j) with i>j input by user is ingored. 207 */ 208 #undef __FUNCT__ 209 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 210 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 211 { 212 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 213 MatScalar value; 214 PetscTruth roworiented = baij->roworiented; 215 PetscErrorCode ierr; 216 PetscInt i,j,row,col; 217 PetscInt rstart_orig=baij->rstart_bs; 218 PetscInt rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 219 PetscInt cend_orig=baij->cend_bs,bs=mat->bs; 220 221 /* Some Variables required in the macro */ 222 Mat A = baij->A; 223 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 224 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 225 MatScalar *aa=a->a; 226 227 Mat B = baij->B; 228 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 229 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 230 MatScalar *ba=b->a; 231 232 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 233 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 234 MatScalar *ap,*bap; 235 236 /* for stash */ 237 PetscInt n_loc, *in_loc=0; 238 MatScalar *v_loc=0; 239 240 PetscFunctionBegin; 241 242 if(!baij->donotstash){ 243 ierr = PetscMalloc(n*sizeof(PetscInt),&in_loc);CHKERRQ(ierr); 244 ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr); 245 } 246 247 for (i=0; i<m; i++) { 248 if (im[i] < 0) continue; 249 #if defined(PETSC_USE_DEBUG) 250 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 251 #endif 252 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 253 row = im[i] - rstart_orig; /* local row index */ 254 for (j=0; j<n; j++) { 255 if (im[i]/bs > in[j]/bs){ 256 if (a->ignore_ltriangular){ 257 continue; /* ignore lower triangular blocks */ 258 } else { 259 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)"); 260 } 261 } 262 if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 263 col = in[j] - cstart_orig; /* local col index */ 264 brow = row/bs; bcol = col/bs; 265 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 266 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 267 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 268 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 269 } else if (in[j] < 0) continue; 270 #if defined(PETSC_USE_DEBUG) 271 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 272 #endif 273 else { /* off-diag entry (B) */ 274 if (mat->was_assembled) { 275 if (!baij->colmap) { 276 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 277 } 278 #if defined (PETSC_USE_CTABLE) 279 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 280 col = col - 1; 281 #else 282 col = baij->colmap[in[j]/bs] - 1; 283 #endif 284 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 285 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 286 col = in[j]; 287 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 288 B = baij->B; 289 b = (Mat_SeqBAIJ*)(B)->data; 290 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 291 ba=b->a; 292 } else col += in[j]%bs; 293 } else col = in[j]; 294 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 295 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 296 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 297 } 298 } 299 } else { /* off processor entry */ 300 if (!baij->donotstash) { 301 n_loc = 0; 302 for (j=0; j<n; j++){ 303 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 304 in_loc[n_loc] = in[j]; 305 if (roworiented) { 306 v_loc[n_loc] = v[i*n+j]; 307 } else { 308 v_loc[n_loc] = v[j*m+i]; 309 } 310 n_loc++; 311 } 312 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 313 } 314 } 315 } 316 317 if(!baij->donotstash){ 318 ierr = PetscFree(in_loc);CHKERRQ(ierr); 319 ierr = PetscFree(v_loc);CHKERRQ(ierr); 320 } 321 PetscFunctionReturn(0); 322 } 323 324 #undef __FUNCT__ 325 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar" 326 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 327 { 328 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 329 const MatScalar *value; 330 MatScalar *barray=baij->barray; 331 PetscTruth roworiented = baij->roworiented; 332 PetscErrorCode ierr; 333 PetscInt i,j,ii,jj,row,col,rstart=baij->rstart; 334 PetscInt rend=baij->rend,cstart=baij->cstart,stepval; 335 PetscInt cend=baij->cend,bs=mat->bs,bs2=baij->bs2; 336 337 PetscFunctionBegin; 338 if(!barray) { 339 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 340 baij->barray = barray; 341 } 342 343 if (roworiented) { 344 stepval = (n-1)*bs; 345 } else { 346 stepval = (m-1)*bs; 347 } 348 for (i=0; i<m; i++) { 349 if (im[i] < 0) continue; 350 #if defined(PETSC_USE_DEBUG) 351 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 352 #endif 353 if (im[i] >= rstart && im[i] < rend) { 354 row = im[i] - rstart; 355 for (j=0; j<n; j++) { 356 /* If NumCol = 1 then a copy is not required */ 357 if ((roworiented) && (n == 1)) { 358 barray = (MatScalar*) v + i*bs2; 359 } else if((!roworiented) && (m == 1)) { 360 barray = (MatScalar*) v + j*bs2; 361 } else { /* Here a copy is required */ 362 if (roworiented) { 363 value = v + i*(stepval+bs)*bs + j*bs; 364 } else { 365 value = v + j*(stepval+bs)*bs + i*bs; 366 } 367 for (ii=0; ii<bs; ii++,value+=stepval) { 368 for (jj=0; jj<bs; jj++) { 369 *barray++ = *value++; 370 } 371 } 372 barray -=bs2; 373 } 374 375 if (in[j] >= cstart && in[j] < cend){ 376 col = in[j] - cstart; 377 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 378 } 379 else if (in[j] < 0) continue; 380 #if defined(PETSC_USE_DEBUG) 381 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 382 #endif 383 else { 384 if (mat->was_assembled) { 385 if (!baij->colmap) { 386 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 387 } 388 389 #if defined(PETSC_USE_DEBUG) 390 #if defined (PETSC_USE_CTABLE) 391 { PetscInt data; 392 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 393 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 394 } 395 #else 396 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 397 #endif 398 #endif 399 #if defined (PETSC_USE_CTABLE) 400 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 401 col = (col - 1)/bs; 402 #else 403 col = (baij->colmap[in[j]] - 1)/bs; 404 #endif 405 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 406 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 407 col = in[j]; 408 } 409 } 410 else col = in[j]; 411 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 412 } 413 } 414 } else { 415 if (!baij->donotstash) { 416 if (roworiented) { 417 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 418 } else { 419 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 420 } 421 } 422 } 423 } 424 PetscFunctionReturn(0); 425 } 426 427 #undef __FUNCT__ 428 #define __FUNCT__ "MatGetValues_MPISBAIJ" 429 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 430 { 431 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 432 PetscErrorCode ierr; 433 PetscInt bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 434 PetscInt bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 435 436 PetscFunctionBegin; 437 for (i=0; i<m; i++) { 438 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 439 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 440 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 441 row = idxm[i] - bsrstart; 442 for (j=0; j<n; j++) { 443 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); 444 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 445 if (idxn[j] >= bscstart && idxn[j] < bscend){ 446 col = idxn[j] - bscstart; 447 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 448 } else { 449 if (!baij->colmap) { 450 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 451 } 452 #if defined (PETSC_USE_CTABLE) 453 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 454 data --; 455 #else 456 data = baij->colmap[idxn[j]/bs]-1; 457 #endif 458 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 459 else { 460 col = data + idxn[j]%bs; 461 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 462 } 463 } 464 } 465 } else { 466 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 467 } 468 } 469 PetscFunctionReturn(0); 470 } 471 472 #undef __FUNCT__ 473 #define __FUNCT__ "MatNorm_MPISBAIJ" 474 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 475 { 476 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 477 PetscErrorCode ierr; 478 PetscReal sum[2],*lnorm2; 479 480 PetscFunctionBegin; 481 if (baij->size == 1) { 482 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 483 } else { 484 if (type == NORM_FROBENIUS) { 485 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 486 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 487 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 488 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 489 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 490 ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 491 *norm = sqrt(sum[0] + 2*sum[1]); 492 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 493 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 494 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 495 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 496 PetscReal *rsum,*rsum2,vabs; 497 PetscInt *jj,*garray=baij->garray,rstart=baij->rstart,nz; 498 PetscInt brow,bcol,col,bs=baij->A->bs,row,grow,gcol,mbs=amat->mbs; 499 MatScalar *v; 500 501 ierr = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr); 502 rsum2 = rsum + mat->N; 503 ierr = PetscMemzero(rsum,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 504 /* Amat */ 505 v = amat->a; jj = amat->j; 506 for (brow=0; brow<mbs; brow++) { 507 grow = bs*(rstart + brow); 508 nz = amat->i[brow+1] - amat->i[brow]; 509 for (bcol=0; bcol<nz; bcol++){ 510 gcol = bs*(rstart + *jj); jj++; 511 for (col=0; col<bs; col++){ 512 for (row=0; row<bs; row++){ 513 vabs = PetscAbsScalar(*v); v++; 514 rsum[gcol+col] += vabs; 515 /* non-diagonal block */ 516 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 517 } 518 } 519 } 520 } 521 /* Bmat */ 522 v = bmat->a; jj = bmat->j; 523 for (brow=0; brow<mbs; brow++) { 524 grow = bs*(rstart + brow); 525 nz = bmat->i[brow+1] - bmat->i[brow]; 526 for (bcol=0; bcol<nz; bcol++){ 527 gcol = bs*garray[*jj]; jj++; 528 for (col=0; col<bs; col++){ 529 for (row=0; row<bs; row++){ 530 vabs = PetscAbsScalar(*v); v++; 531 rsum[gcol+col] += vabs; 532 rsum[grow+row] += vabs; 533 } 534 } 535 } 536 } 537 ierr = MPI_Allreduce(rsum,rsum2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 538 *norm = 0.0; 539 for (col=0; col<mat->N; col++) { 540 if (rsum2[col] > *norm) *norm = rsum2[col]; 541 } 542 ierr = PetscFree(rsum);CHKERRQ(ierr); 543 } else { 544 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 545 } 546 } 547 PetscFunctionReturn(0); 548 } 549 550 #undef __FUNCT__ 551 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 552 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 553 { 554 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 555 PetscErrorCode ierr; 556 PetscInt nstash,reallocs; 557 InsertMode addv; 558 559 PetscFunctionBegin; 560 if (baij->donotstash) { 561 PetscFunctionReturn(0); 562 } 563 564 /* make sure all processors are either in INSERTMODE or ADDMODE */ 565 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 566 if (addv == (ADD_VALUES|INSERT_VALUES)) { 567 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 568 } 569 mat->insertmode = addv; /* in case this processor had no cache */ 570 571 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 572 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 573 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 574 ierr = PetscVerboseInfo((0,"MatAssemblyBegin_MPISBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 575 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 576 ierr = PetscVerboseInfo((0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 577 PetscFunctionReturn(0); 578 } 579 580 #undef __FUNCT__ 581 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 582 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 583 { 584 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 585 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 586 Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 587 PetscErrorCode ierr; 588 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 589 PetscInt *row,*col,other_disassembled; 590 PetscMPIInt n; 591 PetscTruth r1,r2,r3; 592 MatScalar *val; 593 InsertMode addv = mat->insertmode; 594 595 PetscFunctionBegin; 596 597 if (!baij->donotstash) { 598 while (1) { 599 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 600 if (!flg) break; 601 602 for (i=0; i<n;) { 603 /* Now identify the consecutive vals belonging to the same row */ 604 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 605 if (j < n) ncols = j-i; 606 else ncols = n-i; 607 /* Now assemble all these values with a single function call */ 608 ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 609 i = j; 610 } 611 } 612 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 613 /* Now process the block-stash. Since the values are stashed column-oriented, 614 set the roworiented flag to column oriented, and after MatSetValues() 615 restore the original flags */ 616 r1 = baij->roworiented; 617 r2 = a->roworiented; 618 r3 = b->roworiented; 619 baij->roworiented = PETSC_FALSE; 620 a->roworiented = PETSC_FALSE; 621 b->roworiented = PETSC_FALSE; 622 while (1) { 623 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 624 if (!flg) break; 625 626 for (i=0; i<n;) { 627 /* Now identify the consecutive vals belonging to the same row */ 628 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 629 if (j < n) ncols = j-i; 630 else ncols = n-i; 631 ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 632 i = j; 633 } 634 } 635 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 636 baij->roworiented = r1; 637 a->roworiented = r2; 638 b->roworiented = r3; 639 } 640 641 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 642 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 643 644 /* determine if any processor has disassembled, if so we must 645 also disassemble ourselfs, in order that we may reassemble. */ 646 /* 647 if nonzero structure of submatrix B cannot change then we know that 648 no processor disassembled thus we can skip this stuff 649 */ 650 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 651 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 652 if (mat->was_assembled && !other_disassembled) { 653 ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 654 } 655 } 656 657 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 658 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 659 } 660 b->compressedrow.use = PETSC_TRUE; 661 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 662 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 663 664 if (baij->rowvalues) { 665 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 666 baij->rowvalues = 0; 667 } 668 669 PetscFunctionReturn(0); 670 } 671 672 #undef __FUNCT__ 673 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 674 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 675 { 676 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 677 PetscErrorCode ierr; 678 PetscInt bs = mat->bs; 679 PetscMPIInt size = baij->size,rank = baij->rank; 680 PetscTruth iascii,isdraw; 681 PetscViewer sviewer; 682 PetscViewerFormat format; 683 684 PetscFunctionBegin; 685 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 686 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 687 if (iascii) { 688 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 689 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 690 MatInfo info; 691 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 692 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 693 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 694 rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 695 mat->bs,(PetscInt)info.memory);CHKERRQ(ierr); 696 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 697 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 698 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 699 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 700 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 701 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 702 PetscFunctionReturn(0); 703 } else if (format == PETSC_VIEWER_ASCII_INFO) { 704 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 705 PetscFunctionReturn(0); 706 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 707 PetscFunctionReturn(0); 708 } 709 } 710 711 if (isdraw) { 712 PetscDraw draw; 713 PetscTruth isnull; 714 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 715 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 716 } 717 718 if (size == 1) { 719 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 720 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 721 } else { 722 /* assemble the entire matrix onto first processor. */ 723 Mat A; 724 Mat_SeqSBAIJ *Aloc; 725 Mat_SeqBAIJ *Bloc; 726 PetscInt M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 727 MatScalar *a; 728 729 /* Should this be the same type as mat? */ 730 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 731 if (!rank) { 732 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 733 } else { 734 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 735 } 736 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 737 ierr = MatMPISBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 738 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 739 740 /* copy over the A part */ 741 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 742 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 743 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 744 745 for (i=0; i<mbs; i++) { 746 rvals[0] = bs*(baij->rstart + i); 747 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 748 for (j=ai[i]; j<ai[i+1]; j++) { 749 col = (baij->cstart+aj[j])*bs; 750 for (k=0; k<bs; k++) { 751 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 752 col++; a += bs; 753 } 754 } 755 } 756 /* copy over the B part */ 757 Bloc = (Mat_SeqBAIJ*)baij->B->data; 758 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 759 for (i=0; i<mbs; i++) { 760 rvals[0] = bs*(baij->rstart + i); 761 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 762 for (j=ai[i]; j<ai[i+1]; j++) { 763 col = baij->garray[aj[j]]*bs; 764 for (k=0; k<bs; k++) { 765 ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 766 col++; a += bs; 767 } 768 } 769 } 770 ierr = PetscFree(rvals);CHKERRQ(ierr); 771 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 772 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 773 /* 774 Everyone has to call to draw the matrix since the graphics waits are 775 synchronized across all processors that share the PetscDraw object 776 */ 777 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 778 if (!rank) { 779 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 780 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 781 } 782 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 783 ierr = MatDestroy(A);CHKERRQ(ierr); 784 } 785 PetscFunctionReturn(0); 786 } 787 788 #undef __FUNCT__ 789 #define __FUNCT__ "MatView_MPISBAIJ" 790 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 791 { 792 PetscErrorCode ierr; 793 PetscTruth iascii,isdraw,issocket,isbinary; 794 795 PetscFunctionBegin; 796 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 797 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 798 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 799 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 800 if (iascii || isdraw || issocket || isbinary) { 801 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 802 } else { 803 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 804 } 805 PetscFunctionReturn(0); 806 } 807 808 #undef __FUNCT__ 809 #define __FUNCT__ "MatDestroy_MPISBAIJ" 810 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 811 { 812 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 813 PetscErrorCode ierr; 814 815 PetscFunctionBegin; 816 #if defined(PETSC_USE_LOG) 817 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N); 818 #endif 819 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 820 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 821 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 822 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 823 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 824 #if defined (PETSC_USE_CTABLE) 825 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 826 #else 827 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 828 #endif 829 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 830 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 831 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 832 if (baij->slvec0) { 833 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 834 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 835 } 836 if (baij->slvec1) { 837 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 838 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 839 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 840 } 841 if (baij->sMvctx) {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);} 842 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 843 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 844 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 845 #if defined(PETSC_USE_MAT_SINGLE) 846 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 847 #endif 848 ierr = PetscFree(baij);CHKERRQ(ierr); 849 850 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 851 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 852 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 853 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 854 PetscFunctionReturn(0); 855 } 856 857 #undef __FUNCT__ 858 #define __FUNCT__ "MatMult_MPISBAIJ" 859 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 860 { 861 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 862 PetscErrorCode ierr; 863 PetscInt nt,mbs=a->mbs,bs=A->bs; 864 PetscScalar *x,*from,zero=0.0; 865 866 PetscFunctionBegin; 867 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 868 if (nt != A->n) { 869 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 870 } 871 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 872 if (nt != A->m) { 873 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 874 } 875 876 /* diagonal part */ 877 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 878 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 879 880 /* subdiagonal part */ 881 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 882 883 /* copy x into the vec slvec0 */ 884 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 885 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 886 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 887 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 888 889 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 890 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 891 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 892 893 /* supperdiagonal part */ 894 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 895 896 PetscFunctionReturn(0); 897 } 898 899 #undef __FUNCT__ 900 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 901 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 902 { 903 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 904 PetscErrorCode ierr; 905 PetscInt nt; 906 907 PetscFunctionBegin; 908 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 909 if (nt != A->n) { 910 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 911 } 912 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 913 if (nt != A->m) { 914 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 915 } 916 917 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 918 /* do diagonal part */ 919 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 920 /* do supperdiagonal part */ 921 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 922 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 923 /* do subdiagonal part */ 924 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 925 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 926 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 927 928 PetscFunctionReturn(0); 929 } 930 931 #undef __FUNCT__ 932 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 933 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 934 { 935 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 936 PetscErrorCode ierr; 937 PetscInt mbs=a->mbs,bs=A->bs; 938 PetscScalar *x,*from,zero=0.0; 939 940 PetscFunctionBegin; 941 /* 942 PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n"); 943 PetscSynchronizedFlush(A->comm); 944 */ 945 /* diagonal part */ 946 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 947 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 948 949 /* subdiagonal part */ 950 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 951 952 /* copy x into the vec slvec0 */ 953 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 954 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 955 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 956 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 957 958 ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 959 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 960 ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr); 961 962 /* supperdiagonal part */ 963 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 964 965 PetscFunctionReturn(0); 966 } 967 968 #undef __FUNCT__ 969 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 970 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 971 { 972 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 973 PetscErrorCode ierr; 974 975 PetscFunctionBegin; 976 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 977 /* do diagonal part */ 978 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 979 /* do supperdiagonal part */ 980 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 981 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 982 983 /* do subdiagonal part */ 984 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 985 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 986 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 987 988 PetscFunctionReturn(0); 989 } 990 991 /* 992 This only works correctly for square matrices where the subblock A->A is the 993 diagonal block 994 */ 995 #undef __FUNCT__ 996 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 997 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 998 { 999 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1000 PetscErrorCode ierr; 1001 1002 PetscFunctionBegin; 1003 /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1004 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1005 PetscFunctionReturn(0); 1006 } 1007 1008 #undef __FUNCT__ 1009 #define __FUNCT__ "MatScale_MPISBAIJ" 1010 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 1011 { 1012 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1013 PetscErrorCode ierr; 1014 1015 PetscFunctionBegin; 1016 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1017 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1018 PetscFunctionReturn(0); 1019 } 1020 1021 #undef __FUNCT__ 1022 #define __FUNCT__ "MatGetRow_MPISBAIJ" 1023 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1024 { 1025 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1026 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1027 PetscErrorCode ierr; 1028 PetscInt bs = matin->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1029 PetscInt nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1030 PetscInt *cmap,*idx_p,cstart = mat->cstart; 1031 1032 PetscFunctionBegin; 1033 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1034 mat->getrowactive = PETSC_TRUE; 1035 1036 if (!mat->rowvalues && (idx || v)) { 1037 /* 1038 allocate enough space to hold information from the longest row. 1039 */ 1040 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1041 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1042 PetscInt max = 1,mbs = mat->mbs,tmp; 1043 for (i=0; i<mbs; i++) { 1044 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1045 if (max < tmp) { max = tmp; } 1046 } 1047 ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1048 mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1049 } 1050 1051 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1052 lrow = row - brstart; /* local row index */ 1053 1054 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1055 if (!v) {pvA = 0; pvB = 0;} 1056 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1057 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1058 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1059 nztot = nzA + nzB; 1060 1061 cmap = mat->garray; 1062 if (v || idx) { 1063 if (nztot) { 1064 /* Sort by increasing column numbers, assuming A and B already sorted */ 1065 PetscInt imark = -1; 1066 if (v) { 1067 *v = v_p = mat->rowvalues; 1068 for (i=0; i<nzB; i++) { 1069 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1070 else break; 1071 } 1072 imark = i; 1073 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1074 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1075 } 1076 if (idx) { 1077 *idx = idx_p = mat->rowindices; 1078 if (imark > -1) { 1079 for (i=0; i<imark; i++) { 1080 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1081 } 1082 } else { 1083 for (i=0; i<nzB; i++) { 1084 if (cmap[cworkB[i]/bs] < cstart) 1085 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1086 else break; 1087 } 1088 imark = i; 1089 } 1090 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1091 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1092 } 1093 } else { 1094 if (idx) *idx = 0; 1095 if (v) *v = 0; 1096 } 1097 } 1098 *nz = nztot; 1099 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1100 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1101 PetscFunctionReturn(0); 1102 } 1103 1104 #undef __FUNCT__ 1105 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1106 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1107 { 1108 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1109 1110 PetscFunctionBegin; 1111 if (!baij->getrowactive) { 1112 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1113 } 1114 baij->getrowactive = PETSC_FALSE; 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1120 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1121 { 1122 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1123 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1124 1125 PetscFunctionBegin; 1126 aA->getrow_utriangular = PETSC_TRUE; 1127 PetscFunctionReturn(0); 1128 } 1129 #undef __FUNCT__ 1130 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1131 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1132 { 1133 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1134 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1135 1136 PetscFunctionBegin; 1137 aA->getrow_utriangular = PETSC_FALSE; 1138 PetscFunctionReturn(0); 1139 } 1140 1141 #undef __FUNCT__ 1142 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1143 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1144 { 1145 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1146 PetscErrorCode ierr; 1147 1148 PetscFunctionBegin; 1149 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1150 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1151 PetscFunctionReturn(0); 1152 } 1153 1154 #undef __FUNCT__ 1155 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1156 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1157 { 1158 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1159 PetscErrorCode ierr; 1160 1161 PetscFunctionBegin; 1162 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1163 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1164 PetscFunctionReturn(0); 1165 } 1166 1167 #undef __FUNCT__ 1168 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1169 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1170 { 1171 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1172 PetscErrorCode ierr; 1173 1174 PetscFunctionBegin; 1175 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1176 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1177 PetscFunctionReturn(0); 1178 } 1179 1180 #undef __FUNCT__ 1181 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1182 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1183 { 1184 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1185 Mat A = a->A,B = a->B; 1186 PetscErrorCode ierr; 1187 PetscReal isend[5],irecv[5]; 1188 1189 PetscFunctionBegin; 1190 info->block_size = (PetscReal)matin->bs; 1191 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1192 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1193 isend[3] = info->memory; isend[4] = info->mallocs; 1194 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1195 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1196 isend[3] += info->memory; isend[4] += info->mallocs; 1197 if (flag == MAT_LOCAL) { 1198 info->nz_used = isend[0]; 1199 info->nz_allocated = isend[1]; 1200 info->nz_unneeded = isend[2]; 1201 info->memory = isend[3]; 1202 info->mallocs = isend[4]; 1203 } else if (flag == MAT_GLOBAL_MAX) { 1204 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1205 info->nz_used = irecv[0]; 1206 info->nz_allocated = irecv[1]; 1207 info->nz_unneeded = irecv[2]; 1208 info->memory = irecv[3]; 1209 info->mallocs = irecv[4]; 1210 } else if (flag == MAT_GLOBAL_SUM) { 1211 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1212 info->nz_used = irecv[0]; 1213 info->nz_allocated = irecv[1]; 1214 info->nz_unneeded = irecv[2]; 1215 info->memory = irecv[3]; 1216 info->mallocs = irecv[4]; 1217 } else { 1218 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1219 } 1220 info->rows_global = (PetscReal)A->M; 1221 info->columns_global = (PetscReal)A->N; 1222 info->rows_local = (PetscReal)A->m; 1223 info->columns_local = (PetscReal)A->N; 1224 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1225 info->fill_ratio_needed = 0; 1226 info->factor_mallocs = 0; 1227 PetscFunctionReturn(0); 1228 } 1229 1230 #undef __FUNCT__ 1231 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1232 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op) 1233 { 1234 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1235 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1236 PetscErrorCode ierr; 1237 1238 PetscFunctionBegin; 1239 switch (op) { 1240 case MAT_NO_NEW_NONZERO_LOCATIONS: 1241 case MAT_YES_NEW_NONZERO_LOCATIONS: 1242 case MAT_COLUMNS_UNSORTED: 1243 case MAT_COLUMNS_SORTED: 1244 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1245 case MAT_KEEP_ZEROED_ROWS: 1246 case MAT_NEW_NONZERO_LOCATION_ERR: 1247 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1248 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1249 break; 1250 case MAT_ROW_ORIENTED: 1251 a->roworiented = PETSC_TRUE; 1252 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1253 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1254 break; 1255 case MAT_ROWS_SORTED: 1256 case MAT_ROWS_UNSORTED: 1257 case MAT_YES_NEW_DIAGONALS: 1258 ierr = PetscVerboseInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr); 1259 break; 1260 case MAT_COLUMN_ORIENTED: 1261 a->roworiented = PETSC_FALSE; 1262 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1263 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1264 break; 1265 case MAT_IGNORE_OFF_PROC_ENTRIES: 1266 a->donotstash = PETSC_TRUE; 1267 break; 1268 case MAT_NO_NEW_DIAGONALS: 1269 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1270 case MAT_USE_HASH_TABLE: 1271 a->ht_flag = PETSC_TRUE; 1272 break; 1273 case MAT_NOT_SYMMETRIC: 1274 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1275 case MAT_HERMITIAN: 1276 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 1277 case MAT_SYMMETRIC: 1278 case MAT_STRUCTURALLY_SYMMETRIC: 1279 case MAT_NOT_HERMITIAN: 1280 case MAT_SYMMETRY_ETERNAL: 1281 case MAT_NOT_SYMMETRY_ETERNAL: 1282 break; 1283 case MAT_IGNORE_LOWER_TRIANGULAR: 1284 aA->ignore_ltriangular = PETSC_TRUE; 1285 break; 1286 case MAT_ERROR_LOWER_TRIANGULAR: 1287 aA->ignore_ltriangular = PETSC_FALSE; 1288 break; 1289 case MAT_GETROW_UPPERTRIANGULAR: 1290 aA->getrow_utriangular = PETSC_TRUE; 1291 break; 1292 default: 1293 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1294 } 1295 PetscFunctionReturn(0); 1296 } 1297 1298 #undef __FUNCT__ 1299 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1300 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B) 1301 { 1302 PetscErrorCode ierr; 1303 PetscFunctionBegin; 1304 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1305 PetscFunctionReturn(0); 1306 } 1307 1308 #undef __FUNCT__ 1309 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1310 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1311 { 1312 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1313 Mat a=baij->A, b=baij->B; 1314 PetscErrorCode ierr; 1315 PetscInt nv,m,n; 1316 PetscTruth flg; 1317 1318 PetscFunctionBegin; 1319 if (ll != rr){ 1320 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1321 if (!flg) 1322 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1323 } 1324 if (!ll) PetscFunctionReturn(0); 1325 1326 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1327 if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1328 1329 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1330 if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1331 1332 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1333 1334 /* left diagonalscale the off-diagonal part */ 1335 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1336 1337 /* scale the diagonal part */ 1338 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1339 1340 /* right diagonalscale the off-diagonal part */ 1341 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1342 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1343 PetscFunctionReturn(0); 1344 } 1345 1346 #undef __FUNCT__ 1347 #define __FUNCT__ "MatPrintHelp_MPISBAIJ" 1348 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A) 1349 { 1350 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1351 MPI_Comm comm = A->comm; 1352 static PetscTruth called = PETSC_FALSE; 1353 PetscErrorCode ierr; 1354 1355 PetscFunctionBegin; 1356 if (!a->rank) { 1357 ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1358 } 1359 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1360 ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1361 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1362 PetscFunctionReturn(0); 1363 } 1364 1365 #undef __FUNCT__ 1366 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1367 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1368 { 1369 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1370 PetscErrorCode ierr; 1371 1372 PetscFunctionBegin; 1373 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1374 PetscFunctionReturn(0); 1375 } 1376 1377 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1378 1379 #undef __FUNCT__ 1380 #define __FUNCT__ "MatEqual_MPISBAIJ" 1381 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1382 { 1383 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1384 Mat a,b,c,d; 1385 PetscTruth flg; 1386 PetscErrorCode ierr; 1387 1388 PetscFunctionBegin; 1389 a = matA->A; b = matA->B; 1390 c = matB->A; d = matB->B; 1391 1392 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1393 if (flg) { 1394 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1395 } 1396 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1397 PetscFunctionReturn(0); 1398 } 1399 1400 #undef __FUNCT__ 1401 #define __FUNCT__ "MatCopy_MPISBAIJ" 1402 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1403 { 1404 PetscErrorCode ierr; 1405 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1406 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1407 1408 PetscFunctionBegin; 1409 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1410 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1411 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1412 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1413 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1414 } else { 1415 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1416 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1417 } 1418 PetscFunctionReturn(0); 1419 } 1420 1421 #undef __FUNCT__ 1422 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ" 1423 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A) 1424 { 1425 PetscErrorCode ierr; 1426 1427 PetscFunctionBegin; 1428 ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1429 PetscFunctionReturn(0); 1430 } 1431 1432 #include "petscblaslapack.h" 1433 #undef __FUNCT__ 1434 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1435 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1436 { 1437 PetscErrorCode ierr; 1438 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data; 1439 PetscBLASInt bnz,one=1; 1440 Mat_SeqSBAIJ *xa,*ya; 1441 Mat_SeqBAIJ *xb,*yb; 1442 1443 PetscFunctionBegin; 1444 if (str == SAME_NONZERO_PATTERN) { 1445 PetscScalar alpha = a; 1446 xa = (Mat_SeqSBAIJ *)xx->A->data; 1447 ya = (Mat_SeqSBAIJ *)yy->A->data; 1448 bnz = (PetscBLASInt)xa->nz; 1449 BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one); 1450 xb = (Mat_SeqBAIJ *)xx->B->data; 1451 yb = (Mat_SeqBAIJ *)yy->B->data; 1452 bnz = (PetscBLASInt)xb->nz; 1453 BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one); 1454 } else { 1455 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1456 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1457 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1458 } 1459 PetscFunctionReturn(0); 1460 } 1461 1462 #undef __FUNCT__ 1463 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1464 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1465 { 1466 PetscErrorCode ierr; 1467 PetscInt i; 1468 PetscTruth flg; 1469 1470 PetscFunctionBegin; 1471 for (i=0; i<n; i++) { 1472 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1473 if (!flg) { 1474 SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1475 } 1476 } 1477 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1478 PetscFunctionReturn(0); 1479 } 1480 1481 1482 /* -------------------------------------------------------------------*/ 1483 static struct _MatOps MatOps_Values = { 1484 MatSetValues_MPISBAIJ, 1485 MatGetRow_MPISBAIJ, 1486 MatRestoreRow_MPISBAIJ, 1487 MatMult_MPISBAIJ, 1488 /* 4*/ MatMultAdd_MPISBAIJ, 1489 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1490 MatMultAdd_MPISBAIJ, 1491 0, 1492 0, 1493 0, 1494 /*10*/ 0, 1495 0, 1496 0, 1497 MatRelax_MPISBAIJ, 1498 MatTranspose_MPISBAIJ, 1499 /*15*/ MatGetInfo_MPISBAIJ, 1500 MatEqual_MPISBAIJ, 1501 MatGetDiagonal_MPISBAIJ, 1502 MatDiagonalScale_MPISBAIJ, 1503 MatNorm_MPISBAIJ, 1504 /*20*/ MatAssemblyBegin_MPISBAIJ, 1505 MatAssemblyEnd_MPISBAIJ, 1506 0, 1507 MatSetOption_MPISBAIJ, 1508 MatZeroEntries_MPISBAIJ, 1509 /*25*/ 0, 1510 0, 1511 0, 1512 0, 1513 0, 1514 /*30*/ MatSetUpPreallocation_MPISBAIJ, 1515 0, 1516 0, 1517 0, 1518 0, 1519 /*35*/ MatDuplicate_MPISBAIJ, 1520 0, 1521 0, 1522 0, 1523 0, 1524 /*40*/ MatAXPY_MPISBAIJ, 1525 MatGetSubMatrices_MPISBAIJ, 1526 MatIncreaseOverlap_MPISBAIJ, 1527 MatGetValues_MPISBAIJ, 1528 MatCopy_MPISBAIJ, 1529 /*45*/ MatPrintHelp_MPISBAIJ, 1530 MatScale_MPISBAIJ, 1531 0, 1532 0, 1533 0, 1534 /*50*/ 0, 1535 0, 1536 0, 1537 0, 1538 0, 1539 /*55*/ 0, 1540 0, 1541 MatSetUnfactored_MPISBAIJ, 1542 0, 1543 MatSetValuesBlocked_MPISBAIJ, 1544 /*60*/ 0, 1545 0, 1546 0, 1547 MatGetPetscMaps_Petsc, 1548 0, 1549 /*65*/ 0, 1550 0, 1551 0, 1552 0, 1553 0, 1554 /*70*/ MatGetRowMax_MPISBAIJ, 1555 0, 1556 0, 1557 0, 1558 0, 1559 /*75*/ 0, 1560 0, 1561 0, 1562 0, 1563 0, 1564 /*80*/ 0, 1565 0, 1566 0, 1567 0, 1568 MatLoad_MPISBAIJ, 1569 /*85*/ 0, 1570 0, 1571 0, 1572 0, 1573 0, 1574 /*90*/ 0, 1575 0, 1576 0, 1577 0, 1578 0, 1579 /*95*/ 0, 1580 0, 1581 0, 1582 0, 1583 0, 1584 /*100*/0, 1585 0, 1586 0, 1587 0, 1588 0, 1589 /*105*/0, 1590 MatRealPart_MPISBAIJ, 1591 MatImaginaryPart_MPISBAIJ, 1592 MatGetRowUpperTriangular_MPISBAIJ, 1593 MatRestoreRowUpperTriangular_MPISBAIJ 1594 }; 1595 1596 1597 EXTERN_C_BEGIN 1598 #undef __FUNCT__ 1599 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1600 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1601 { 1602 PetscFunctionBegin; 1603 *a = ((Mat_MPISBAIJ *)A->data)->A; 1604 *iscopy = PETSC_FALSE; 1605 PetscFunctionReturn(0); 1606 } 1607 EXTERN_C_END 1608 1609 EXTERN_C_BEGIN 1610 #undef __FUNCT__ 1611 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1612 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 1613 { 1614 Mat_MPISBAIJ *b; 1615 PetscErrorCode ierr; 1616 PetscInt i,mbs,Mbs; 1617 1618 PetscFunctionBegin; 1619 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1620 1621 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 1622 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1623 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1624 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1625 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1626 if (d_nnz) { 1627 for (i=0; i<B->m/bs; i++) { 1628 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 1629 } 1630 } 1631 if (o_nnz) { 1632 for (i=0; i<B->m/bs; i++) { 1633 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 1634 } 1635 } 1636 B->preallocated = PETSC_TRUE; 1637 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 1638 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 1639 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1640 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr); 1641 1642 b = (Mat_MPISBAIJ*)B->data; 1643 mbs = B->m/bs; 1644 Mbs = B->M/bs; 1645 if (mbs*bs != B->m) { 1646 SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->m,bs); 1647 } 1648 1649 B->bs = bs; 1650 b->bs2 = bs*bs; 1651 b->mbs = mbs; 1652 b->nbs = mbs; 1653 b->Mbs = Mbs; 1654 b->Nbs = Mbs; 1655 1656 ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 1657 b->rowners[0] = 0; 1658 for (i=2; i<=b->size; i++) { 1659 b->rowners[i] += b->rowners[i-1]; 1660 } 1661 b->rstart = b->rowners[b->rank]; 1662 b->rend = b->rowners[b->rank+1]; 1663 b->cstart = b->rstart; 1664 b->cend = b->rend; 1665 for (i=0; i<=b->size; i++) { 1666 b->rowners_bs[i] = b->rowners[i]*bs; 1667 } 1668 b->rstart_bs = b-> rstart*bs; 1669 b->rend_bs = b->rend*bs; 1670 1671 b->cstart_bs = b->cstart*bs; 1672 b->cend_bs = b->cend*bs; 1673 1674 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1675 ierr = MatSetSizes(b->A,B->m,B->m,B->m,B->m);CHKERRQ(ierr); 1676 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1677 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1678 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1679 1680 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1681 ierr = MatSetSizes(b->B,B->m,B->M,B->m,B->M);CHKERRQ(ierr); 1682 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1683 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1684 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1685 1686 /* build cache for off array entries formed */ 1687 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 1688 1689 PetscFunctionReturn(0); 1690 } 1691 EXTERN_C_END 1692 1693 /*MC 1694 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1695 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1696 1697 Options Database Keys: 1698 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1699 1700 Level: beginner 1701 1702 .seealso: MatCreateMPISBAIJ 1703 M*/ 1704 1705 EXTERN_C_BEGIN 1706 #undef __FUNCT__ 1707 #define __FUNCT__ "MatCreate_MPISBAIJ" 1708 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B) 1709 { 1710 Mat_MPISBAIJ *b; 1711 PetscErrorCode ierr; 1712 PetscTruth flg; 1713 1714 PetscFunctionBegin; 1715 1716 ierr = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1717 B->data = (void*)b; 1718 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1719 1720 B->ops->destroy = MatDestroy_MPISBAIJ; 1721 B->ops->view = MatView_MPISBAIJ; 1722 B->mapping = 0; 1723 B->factor = 0; 1724 B->assembled = PETSC_FALSE; 1725 1726 B->insertmode = NOT_SET_VALUES; 1727 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1728 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 1729 1730 /* build local table of row and column ownerships */ 1731 ierr = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 1732 b->cowners = b->rowners + b->size + 2; 1733 b->rowners_bs = b->cowners + b->size + 2; 1734 ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 1735 1736 /* build cache for off array entries formed */ 1737 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1738 b->donotstash = PETSC_FALSE; 1739 b->colmap = PETSC_NULL; 1740 b->garray = PETSC_NULL; 1741 b->roworiented = PETSC_TRUE; 1742 1743 #if defined(PETSC_USE_MAT_SINGLE) 1744 /* stuff for MatSetValues_XXX in single precision */ 1745 b->setvalueslen = 0; 1746 b->setvaluescopy = PETSC_NULL; 1747 #endif 1748 1749 /* stuff used in block assembly */ 1750 b->barray = 0; 1751 1752 /* stuff used for matrix vector multiply */ 1753 b->lvec = 0; 1754 b->Mvctx = 0; 1755 b->slvec0 = 0; 1756 b->slvec0b = 0; 1757 b->slvec1 = 0; 1758 b->slvec1a = 0; 1759 b->slvec1b = 0; 1760 b->sMvctx = 0; 1761 1762 /* stuff for MatGetRow() */ 1763 b->rowindices = 0; 1764 b->rowvalues = 0; 1765 b->getrowactive = PETSC_FALSE; 1766 1767 /* hash table stuff */ 1768 b->ht = 0; 1769 b->hd = 0; 1770 b->ht_size = 0; 1771 b->ht_flag = PETSC_FALSE; 1772 b->ht_fact = 0; 1773 b->ht_total_ct = 0; 1774 b->ht_insert_ct = 0; 1775 1776 ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 1777 if (flg) { 1778 PetscReal fact = 1.39; 1779 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 1780 ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 1781 if (fact <= 1.0) fact = 1.39; 1782 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1783 ierr = PetscVerboseInfo((0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr); 1784 } 1785 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1786 "MatStoreValues_MPISBAIJ", 1787 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1788 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1789 "MatRetrieveValues_MPISBAIJ", 1790 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1791 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1792 "MatGetDiagonalBlock_MPISBAIJ", 1793 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1794 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1795 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1796 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1797 B->symmetric = PETSC_TRUE; 1798 B->structurally_symmetric = PETSC_TRUE; 1799 B->symmetric_set = PETSC_TRUE; 1800 B->structurally_symmetric_set = PETSC_TRUE; 1801 PetscFunctionReturn(0); 1802 } 1803 EXTERN_C_END 1804 1805 /*MC 1806 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1807 1808 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1809 and MATMPISBAIJ otherwise. 1810 1811 Options Database Keys: 1812 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1813 1814 Level: beginner 1815 1816 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1817 M*/ 1818 1819 EXTERN_C_BEGIN 1820 #undef __FUNCT__ 1821 #define __FUNCT__ "MatCreate_SBAIJ" 1822 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A) 1823 { 1824 PetscErrorCode ierr; 1825 PetscMPIInt size; 1826 1827 PetscFunctionBegin; 1828 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr); 1829 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1830 if (size == 1) { 1831 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1832 } else { 1833 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1834 } 1835 PetscFunctionReturn(0); 1836 } 1837 EXTERN_C_END 1838 1839 #undef __FUNCT__ 1840 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1841 /*@C 1842 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1843 the user should preallocate the matrix storage by setting the parameters 1844 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1845 performance can be increased by more than a factor of 50. 1846 1847 Collective on Mat 1848 1849 Input Parameters: 1850 + A - the matrix 1851 . bs - size of blockk 1852 . d_nz - number of block nonzeros per block row in diagonal portion of local 1853 submatrix (same for all local rows) 1854 . d_nnz - array containing the number of block nonzeros in the various block rows 1855 in the upper triangular and diagonal part of the in diagonal portion of the local 1856 (possibly different for each block row) or PETSC_NULL. You must leave room 1857 for the diagonal entry even if it is zero. 1858 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1859 submatrix (same for all local rows). 1860 - o_nnz - array containing the number of nonzeros in the various block rows of the 1861 off-diagonal portion of the local submatrix (possibly different for 1862 each block row) or PETSC_NULL. 1863 1864 1865 Options Database Keys: 1866 . -mat_no_unroll - uses code that does not unroll the loops in the 1867 block calculations (much slower) 1868 . -mat_block_size - size of the blocks to use 1869 1870 Notes: 1871 1872 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1873 than it must be used on all processors that share the object for that argument. 1874 1875 If the *_nnz parameter is given then the *_nz parameter is ignored 1876 1877 Storage Information: 1878 For a square global matrix we define each processor's diagonal portion 1879 to be its local rows and the corresponding columns (a square submatrix); 1880 each processor's off-diagonal portion encompasses the remainder of the 1881 local matrix (a rectangular submatrix). 1882 1883 The user can specify preallocated storage for the diagonal part of 1884 the local submatrix with either d_nz or d_nnz (not both). Set 1885 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1886 memory allocation. Likewise, specify preallocated storage for the 1887 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1888 1889 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1890 the figure below we depict these three local rows and all columns (0-11). 1891 1892 .vb 1893 0 1 2 3 4 5 6 7 8 9 10 11 1894 ------------------- 1895 row 3 | o o o d d d o o o o o o 1896 row 4 | o o o d d d o o o o o o 1897 row 5 | o o o d d d o o o o o o 1898 ------------------- 1899 .ve 1900 1901 Thus, any entries in the d locations are stored in the d (diagonal) 1902 submatrix, and any entries in the o locations are stored in the 1903 o (off-diagonal) submatrix. Note that the d matrix is stored in 1904 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1905 1906 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1907 plus the diagonal part of the d matrix, 1908 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1909 In general, for PDE problems in which most nonzeros are near the diagonal, 1910 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1911 or you will get TERRIBLE performance; see the users' manual chapter on 1912 matrices. 1913 1914 Level: intermediate 1915 1916 .keywords: matrix, block, aij, compressed row, sparse, parallel 1917 1918 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1919 @*/ 1920 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1921 { 1922 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 1923 1924 PetscFunctionBegin; 1925 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1926 if (f) { 1927 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1928 } 1929 PetscFunctionReturn(0); 1930 } 1931 1932 #undef __FUNCT__ 1933 #define __FUNCT__ "MatCreateMPISBAIJ" 1934 /*@C 1935 MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1936 (block compressed row). For good matrix assembly performance 1937 the user should preallocate the matrix storage by setting the parameters 1938 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1939 performance can be increased by more than a factor of 50. 1940 1941 Collective on MPI_Comm 1942 1943 Input Parameters: 1944 + comm - MPI communicator 1945 . bs - size of blockk 1946 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1947 This value should be the same as the local size used in creating the 1948 y vector for the matrix-vector product y = Ax. 1949 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1950 This value should be the same as the local size used in creating the 1951 x vector for the matrix-vector product y = Ax. 1952 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1953 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1954 . d_nz - number of block nonzeros per block row in diagonal portion of local 1955 submatrix (same for all local rows) 1956 . d_nnz - array containing the number of block nonzeros in the various block rows 1957 in the upper triangular portion of the in diagonal portion of the local 1958 (possibly different for each block block row) or PETSC_NULL. 1959 You must leave room for the diagonal entry even if it is zero. 1960 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1961 submatrix (same for all local rows). 1962 - o_nnz - array containing the number of nonzeros in the various block rows of the 1963 off-diagonal portion of the local submatrix (possibly different for 1964 each block row) or PETSC_NULL. 1965 1966 Output Parameter: 1967 . A - the matrix 1968 1969 Options Database Keys: 1970 . -mat_no_unroll - uses code that does not unroll the loops in the 1971 block calculations (much slower) 1972 . -mat_block_size - size of the blocks to use 1973 . -mat_mpi - use the parallel matrix data structures even on one processor 1974 (defaults to using SeqBAIJ format on one processor) 1975 1976 Notes: 1977 The number of rows and columns must be divisible by blocksize. 1978 1979 The user MUST specify either the local or global matrix dimensions 1980 (possibly both). 1981 1982 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1983 than it must be used on all processors that share the object for that argument. 1984 1985 If the *_nnz parameter is given then the *_nz parameter is ignored 1986 1987 Storage Information: 1988 For a square global matrix we define each processor's diagonal portion 1989 to be its local rows and the corresponding columns (a square submatrix); 1990 each processor's off-diagonal portion encompasses the remainder of the 1991 local matrix (a rectangular submatrix). 1992 1993 The user can specify preallocated storage for the diagonal part of 1994 the local submatrix with either d_nz or d_nnz (not both). Set 1995 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1996 memory allocation. Likewise, specify preallocated storage for the 1997 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1998 1999 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2000 the figure below we depict these three local rows and all columns (0-11). 2001 2002 .vb 2003 0 1 2 3 4 5 6 7 8 9 10 11 2004 ------------------- 2005 row 3 | o o o d d d o o o o o o 2006 row 4 | o o o d d d o o o o o o 2007 row 5 | o o o d d d o o o o o o 2008 ------------------- 2009 .ve 2010 2011 Thus, any entries in the d locations are stored in the d (diagonal) 2012 submatrix, and any entries in the o locations are stored in the 2013 o (off-diagonal) submatrix. Note that the d matrix is stored in 2014 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2015 2016 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2017 plus the diagonal part of the d matrix, 2018 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2019 In general, for PDE problems in which most nonzeros are near the diagonal, 2020 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2021 or you will get TERRIBLE performance; see the users' manual chapter on 2022 matrices. 2023 2024 Level: intermediate 2025 2026 .keywords: matrix, block, aij, compressed row, sparse, parallel 2027 2028 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2029 @*/ 2030 2031 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2032 { 2033 PetscErrorCode ierr; 2034 PetscMPIInt size; 2035 2036 PetscFunctionBegin; 2037 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2038 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2039 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2040 if (size > 1) { 2041 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2042 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2043 } else { 2044 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2045 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2046 } 2047 PetscFunctionReturn(0); 2048 } 2049 2050 2051 #undef __FUNCT__ 2052 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2053 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2054 { 2055 Mat mat; 2056 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2057 PetscErrorCode ierr; 2058 PetscInt len=0,nt,bs=matin->bs,mbs=oldmat->mbs; 2059 PetscScalar *array; 2060 2061 PetscFunctionBegin; 2062 *newmat = 0; 2063 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 2064 ierr = MatSetSizes(mat,matin->m,matin->n,matin->M,matin->N);CHKERRQ(ierr); 2065 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2066 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2067 2068 mat->factor = matin->factor; 2069 mat->preallocated = PETSC_TRUE; 2070 mat->assembled = PETSC_TRUE; 2071 mat->insertmode = NOT_SET_VALUES; 2072 2073 a = (Mat_MPISBAIJ*)mat->data; 2074 mat->bs = matin->bs; 2075 a->bs2 = oldmat->bs2; 2076 a->mbs = oldmat->mbs; 2077 a->nbs = oldmat->nbs; 2078 a->Mbs = oldmat->Mbs; 2079 a->Nbs = oldmat->Nbs; 2080 2081 a->rstart = oldmat->rstart; 2082 a->rend = oldmat->rend; 2083 a->cstart = oldmat->cstart; 2084 a->cend = oldmat->cend; 2085 a->size = oldmat->size; 2086 a->rank = oldmat->rank; 2087 a->donotstash = oldmat->donotstash; 2088 a->roworiented = oldmat->roworiented; 2089 a->rowindices = 0; 2090 a->rowvalues = 0; 2091 a->getrowactive = PETSC_FALSE; 2092 a->barray = 0; 2093 a->rstart_bs = oldmat->rstart_bs; 2094 a->rend_bs = oldmat->rend_bs; 2095 a->cstart_bs = oldmat->cstart_bs; 2096 a->cend_bs = oldmat->cend_bs; 2097 2098 /* hash table stuff */ 2099 a->ht = 0; 2100 a->hd = 0; 2101 a->ht_size = 0; 2102 a->ht_flag = oldmat->ht_flag; 2103 a->ht_fact = oldmat->ht_fact; 2104 a->ht_total_ct = 0; 2105 a->ht_insert_ct = 0; 2106 2107 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2108 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2109 ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr); 2110 if (oldmat->colmap) { 2111 #if defined (PETSC_USE_CTABLE) 2112 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2113 #else 2114 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2115 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2116 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2117 #endif 2118 } else a->colmap = 0; 2119 2120 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2121 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2122 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2123 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2124 } else a->garray = 0; 2125 2126 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2127 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2128 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2129 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2130 2131 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2132 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2133 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2134 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2135 2136 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2137 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2138 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2139 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2140 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2141 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2142 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2143 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2144 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2145 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2146 ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr); 2147 ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr); 2148 ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr); 2149 2150 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2151 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2152 a->sMvctx = oldmat->sMvctx; 2153 ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr); 2154 2155 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2156 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2157 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2158 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2159 ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2160 *newmat = mat; 2161 PetscFunctionReturn(0); 2162 } 2163 2164 #include "petscsys.h" 2165 2166 #undef __FUNCT__ 2167 #define __FUNCT__ "MatLoad_MPISBAIJ" 2168 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2169 { 2170 Mat A; 2171 PetscErrorCode ierr; 2172 PetscInt i,nz,j,rstart,rend; 2173 PetscScalar *vals,*buf; 2174 MPI_Comm comm = ((PetscObject)viewer)->comm; 2175 MPI_Status status; 2176 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens; 2177 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2178 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2179 PetscInt bs=1,Mbs,mbs,extra_rows; 2180 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2181 PetscInt dcount,kmax,k,nzcount,tmp; 2182 int fd; 2183 2184 PetscFunctionBegin; 2185 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2186 2187 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2188 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2189 if (!rank) { 2190 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2191 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2192 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2193 if (header[3] < 0) { 2194 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2195 } 2196 } 2197 2198 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2199 M = header[1]; N = header[2]; 2200 2201 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2202 2203 /* 2204 This code adds extra rows to make sure the number of rows is 2205 divisible by the blocksize 2206 */ 2207 Mbs = M/bs; 2208 extra_rows = bs - M + bs*(Mbs); 2209 if (extra_rows == bs) extra_rows = 0; 2210 else Mbs++; 2211 if (extra_rows &&!rank) { 2212 ierr = PetscVerboseInfo((0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 2213 } 2214 2215 /* determine ownership of all rows */ 2216 mbs = Mbs/size + ((Mbs % size) > rank); 2217 m = mbs*bs; 2218 ierr = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 2219 browners = rowners + size + 1; 2220 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2221 rowners[0] = 0; 2222 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2223 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2224 rstart = rowners[rank]; 2225 rend = rowners[rank+1]; 2226 2227 /* distribute row lengths to all processors */ 2228 ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr); 2229 if (!rank) { 2230 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2231 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2232 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2233 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2234 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2235 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2236 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2237 } else { 2238 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2239 } 2240 2241 if (!rank) { /* procs[0] */ 2242 /* calculate the number of nonzeros on each processor */ 2243 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2244 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2245 for (i=0; i<size; i++) { 2246 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2247 procsnz[i] += rowlengths[j]; 2248 } 2249 } 2250 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2251 2252 /* determine max buffer needed and allocate it */ 2253 maxnz = 0; 2254 for (i=0; i<size; i++) { 2255 maxnz = PetscMax(maxnz,procsnz[i]); 2256 } 2257 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2258 2259 /* read in my part of the matrix column indices */ 2260 nz = procsnz[0]; 2261 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2262 mycols = ibuf; 2263 if (size == 1) nz -= extra_rows; 2264 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2265 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2266 2267 /* read in every ones (except the last) and ship off */ 2268 for (i=1; i<size-1; i++) { 2269 nz = procsnz[i]; 2270 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2271 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2272 } 2273 /* read in the stuff for the last proc */ 2274 if (size != 1) { 2275 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2276 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2277 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2278 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2279 } 2280 ierr = PetscFree(cols);CHKERRQ(ierr); 2281 } else { /* procs[i], i>0 */ 2282 /* determine buffer space needed for message */ 2283 nz = 0; 2284 for (i=0; i<m; i++) { 2285 nz += locrowlens[i]; 2286 } 2287 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2288 mycols = ibuf; 2289 /* receive message of column indices*/ 2290 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2291 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2292 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2293 } 2294 2295 /* loop over local rows, determining number of off diagonal entries */ 2296 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2297 odlens = dlens + (rend-rstart); 2298 ierr = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2299 ierr = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2300 masked1 = mask + Mbs; 2301 masked2 = masked1 + Mbs; 2302 rowcount = 0; nzcount = 0; 2303 for (i=0; i<mbs; i++) { 2304 dcount = 0; 2305 odcount = 0; 2306 for (j=0; j<bs; j++) { 2307 kmax = locrowlens[rowcount]; 2308 for (k=0; k<kmax; k++) { 2309 tmp = mycols[nzcount++]/bs; /* block col. index */ 2310 if (!mask[tmp]) { 2311 mask[tmp] = 1; 2312 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2313 else masked1[dcount++] = tmp; /* entry in diag portion */ 2314 } 2315 } 2316 rowcount++; 2317 } 2318 2319 dlens[i] = dcount; /* d_nzz[i] */ 2320 odlens[i] = odcount; /* o_nzz[i] */ 2321 2322 /* zero out the mask elements we set */ 2323 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2324 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2325 } 2326 2327 /* create our matrix */ 2328 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2329 ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2330 ierr = MatSetType(A,type);CHKERRQ(ierr); 2331 ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2332 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2333 2334 if (!rank) { 2335 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2336 /* read in my part of the matrix numerical values */ 2337 nz = procsnz[0]; 2338 vals = buf; 2339 mycols = ibuf; 2340 if (size == 1) nz -= extra_rows; 2341 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2342 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2343 2344 /* insert into matrix */ 2345 jj = rstart*bs; 2346 for (i=0; i<m; i++) { 2347 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2348 mycols += locrowlens[i]; 2349 vals += locrowlens[i]; 2350 jj++; 2351 } 2352 2353 /* read in other processors (except the last one) and ship out */ 2354 for (i=1; i<size-1; i++) { 2355 nz = procsnz[i]; 2356 vals = buf; 2357 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2358 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2359 } 2360 /* the last proc */ 2361 if (size != 1){ 2362 nz = procsnz[i] - extra_rows; 2363 vals = buf; 2364 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2365 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2366 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2367 } 2368 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2369 2370 } else { 2371 /* receive numeric values */ 2372 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2373 2374 /* receive message of values*/ 2375 vals = buf; 2376 mycols = ibuf; 2377 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2378 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2379 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2380 2381 /* insert into matrix */ 2382 jj = rstart*bs; 2383 for (i=0; i<m; i++) { 2384 ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2385 mycols += locrowlens[i]; 2386 vals += locrowlens[i]; 2387 jj++; 2388 } 2389 } 2390 2391 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2392 ierr = PetscFree(buf);CHKERRQ(ierr); 2393 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2394 ierr = PetscFree(rowners);CHKERRQ(ierr); 2395 ierr = PetscFree(dlens);CHKERRQ(ierr); 2396 ierr = PetscFree(mask);CHKERRQ(ierr); 2397 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2398 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2399 *newmat = A; 2400 PetscFunctionReturn(0); 2401 } 2402 2403 #undef __FUNCT__ 2404 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2405 /*XXXXX@ 2406 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2407 2408 Input Parameters: 2409 . mat - the matrix 2410 . fact - factor 2411 2412 Collective on Mat 2413 2414 Level: advanced 2415 2416 Notes: 2417 This can also be set by the command line option: -mat_use_hash_table fact 2418 2419 .keywords: matrix, hashtable, factor, HT 2420 2421 .seealso: MatSetOption() 2422 @XXXXX*/ 2423 2424 2425 #undef __FUNCT__ 2426 #define __FUNCT__ "MatGetRowMax_MPISBAIJ" 2427 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v) 2428 { 2429 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2430 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2431 PetscReal atmp; 2432 PetscReal *work,*svalues,*rvalues; 2433 PetscErrorCode ierr; 2434 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2435 PetscMPIInt rank,size; 2436 PetscInt *rowners_bs,dest,count,source; 2437 PetscScalar *va; 2438 MatScalar *ba; 2439 MPI_Status stat; 2440 2441 PetscFunctionBegin; 2442 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 2443 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2444 2445 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2446 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2447 2448 bs = A->bs; 2449 mbs = a->mbs; 2450 Mbs = a->Mbs; 2451 ba = b->a; 2452 bi = b->i; 2453 bj = b->j; 2454 2455 /* find ownerships */ 2456 rowners_bs = a->rowners_bs; 2457 2458 /* each proc creates an array to be distributed */ 2459 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2460 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2461 2462 /* row_max for B */ 2463 if (rank != size-1){ 2464 for (i=0; i<mbs; i++) { 2465 ncols = bi[1] - bi[0]; bi++; 2466 brow = bs*i; 2467 for (j=0; j<ncols; j++){ 2468 bcol = bs*(*bj); 2469 for (kcol=0; kcol<bs; kcol++){ 2470 col = bcol + kcol; /* local col index */ 2471 col += rowners_bs[rank+1]; /* global col index */ 2472 for (krow=0; krow<bs; krow++){ 2473 atmp = PetscAbsScalar(*ba); ba++; 2474 row = brow + krow; /* local row index */ 2475 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2476 if (work[col] < atmp) work[col] = atmp; 2477 } 2478 } 2479 bj++; 2480 } 2481 } 2482 2483 /* send values to its owners */ 2484 for (dest=rank+1; dest<size; dest++){ 2485 svalues = work + rowners_bs[dest]; 2486 count = rowners_bs[dest+1]-rowners_bs[dest]; 2487 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr); 2488 } 2489 } 2490 2491 /* receive values */ 2492 if (rank){ 2493 rvalues = work; 2494 count = rowners_bs[rank+1]-rowners_bs[rank]; 2495 for (source=0; source<rank; source++){ 2496 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr); 2497 /* process values */ 2498 for (i=0; i<count; i++){ 2499 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2500 } 2501 } 2502 } 2503 2504 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2505 ierr = PetscFree(work);CHKERRQ(ierr); 2506 PetscFunctionReturn(0); 2507 } 2508 2509 #undef __FUNCT__ 2510 #define __FUNCT__ "MatRelax_MPISBAIJ" 2511 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2512 { 2513 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2514 PetscErrorCode ierr; 2515 PetscInt mbs=mat->mbs,bs=matin->bs; 2516 PetscScalar *x,*b,*ptr,zero=0.0; 2517 Vec bb1; 2518 2519 PetscFunctionBegin; 2520 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2521 if (bs > 1) 2522 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2523 2524 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2525 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2526 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2527 its--; 2528 } 2529 2530 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2531 while (its--){ 2532 2533 /* lower triangular part: slvec0b = - B^T*xx */ 2534 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2535 2536 /* copy xx into slvec0a */ 2537 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2538 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2539 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2540 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2541 2542 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2543 2544 /* copy bb into slvec1a */ 2545 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2546 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2547 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2548 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2549 2550 /* set slvec1b = 0 */ 2551 ierr = VecSet(mat->slvec1b,zero);CHKERRQ(ierr); 2552 2553 ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2554 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2555 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2556 ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr); 2557 2558 /* upper triangular part: bb1 = bb1 - B*x */ 2559 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2560 2561 /* local diagonal sweep */ 2562 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2563 } 2564 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2565 } else { 2566 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2567 } 2568 PetscFunctionReturn(0); 2569 } 2570 2571 #undef __FUNCT__ 2572 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm" 2573 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2574 { 2575 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2576 PetscErrorCode ierr; 2577 Vec lvec1,bb1; 2578 2579 PetscFunctionBegin; 2580 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2581 if (matin->bs > 1) 2582 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2583 2584 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2585 if ( flag & SOR_ZERO_INITIAL_GUESS ) { 2586 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2587 its--; 2588 } 2589 2590 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2591 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2592 while (its--){ 2593 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2594 2595 /* lower diagonal part: bb1 = bb - B^T*xx */ 2596 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2597 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2598 2599 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 2600 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2601 ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2602 2603 /* upper diagonal part: bb1 = bb1 - B*x */ 2604 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2605 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2606 2607 ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr); 2608 2609 /* diagonal sweep */ 2610 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2611 } 2612 ierr = VecDestroy(lvec1);CHKERRQ(ierr); 2613 ierr = VecDestroy(bb1);CHKERRQ(ierr); 2614 } else { 2615 SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2616 } 2617 PetscFunctionReturn(0); 2618 } 2619 2620