1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar); 15 16 /* UGLY, ugly, ugly 17 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 18 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 19 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 20 converts the entries into single precision and then calls ..._MatScalar() to put them 21 into the single precision data structures. 22 */ 23 #if defined(PETSC_USE_MAT_SINGLE) 24 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 25 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 26 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 27 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 28 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 29 #else 30 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 31 #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 32 #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 33 #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 34 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 35 #endif 36 37 #undef __FUNCT__ 38 #define __FUNCT__ "MatGetRowMax_MPIBAIJ" 39 PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v) 40 { 41 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 42 PetscErrorCode ierr; 43 PetscInt i; 44 PetscScalar *va,*vb; 45 Vec vtmp; 46 47 PetscFunctionBegin; 48 49 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 50 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 51 52 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap.n,&vtmp);CHKERRQ(ierr); 53 ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr); 54 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 55 56 for (i=0; i<A->rmap.n; i++){ 57 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i]; 58 } 59 60 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 61 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 62 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 63 64 PetscFunctionReturn(0); 65 } 66 67 EXTERN_C_BEGIN 68 #undef __FUNCT__ 69 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 70 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 71 { 72 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 73 PetscErrorCode ierr; 74 75 PetscFunctionBegin; 76 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 77 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 EXTERN_C_END 81 82 EXTERN_C_BEGIN 83 #undef __FUNCT__ 84 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 85 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 86 { 87 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 88 PetscErrorCode ierr; 89 90 PetscFunctionBegin; 91 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 92 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 93 PetscFunctionReturn(0); 94 } 95 EXTERN_C_END 96 97 /* 98 Local utility routine that creates a mapping from the global column 99 number to the local number in the off-diagonal part of the local 100 storage of the matrix. This is done in a non scable way since the 101 length of colmap equals the global matrix length. 102 */ 103 #undef __FUNCT__ 104 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 105 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 106 { 107 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 108 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 109 PetscErrorCode ierr; 110 PetscInt nbs = B->nbs,i,bs=mat->rmap.bs; 111 112 PetscFunctionBegin; 113 #if defined (PETSC_USE_CTABLE) 114 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 115 for (i=0; i<nbs; i++){ 116 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 117 } 118 #else 119 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 120 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 121 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 122 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 123 #endif 124 PetscFunctionReturn(0); 125 } 126 127 #define CHUNKSIZE 10 128 129 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 130 { \ 131 \ 132 brow = row/bs; \ 133 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 134 rmax = aimax[brow]; nrow = ailen[brow]; \ 135 bcol = col/bs; \ 136 ridx = row % bs; cidx = col % bs; \ 137 low = 0; high = nrow; \ 138 while (high-low > 3) { \ 139 t = (low+high)/2; \ 140 if (rp[t] > bcol) high = t; \ 141 else low = t; \ 142 } \ 143 for (_i=low; _i<high; _i++) { \ 144 if (rp[_i] > bcol) break; \ 145 if (rp[_i] == bcol) { \ 146 bap = ap + bs2*_i + bs*cidx + ridx; \ 147 if (addv == ADD_VALUES) *bap += value; \ 148 else *bap = value; \ 149 goto a_noinsert; \ 150 } \ 151 } \ 152 if (a->nonew == 1) goto a_noinsert; \ 153 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 154 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew); \ 155 N = nrow++ - 1; \ 156 /* shift up all the later entries in this row */ \ 157 for (ii=N; ii>=_i; ii--) { \ 158 rp[ii+1] = rp[ii]; \ 159 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 160 } \ 161 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 162 rp[_i] = bcol; \ 163 ap[bs2*_i + bs*cidx + ridx] = value; \ 164 a_noinsert:; \ 165 ailen[brow] = nrow; \ 166 } 167 168 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 169 { \ 170 brow = row/bs; \ 171 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 172 rmax = bimax[brow]; nrow = bilen[brow]; \ 173 bcol = col/bs; \ 174 ridx = row % bs; cidx = col % bs; \ 175 low = 0; high = nrow; \ 176 while (high-low > 3) { \ 177 t = (low+high)/2; \ 178 if (rp[t] > bcol) high = t; \ 179 else low = t; \ 180 } \ 181 for (_i=low; _i<high; _i++) { \ 182 if (rp[_i] > bcol) break; \ 183 if (rp[_i] == bcol) { \ 184 bap = ap + bs2*_i + bs*cidx + ridx; \ 185 if (addv == ADD_VALUES) *bap += value; \ 186 else *bap = value; \ 187 goto b_noinsert; \ 188 } \ 189 } \ 190 if (b->nonew == 1) goto b_noinsert; \ 191 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 192 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew); \ 193 CHKMEMQ;\ 194 N = nrow++ - 1; \ 195 /* shift up all the later entries in this row */ \ 196 for (ii=N; ii>=_i; ii--) { \ 197 rp[ii+1] = rp[ii]; \ 198 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 199 } \ 200 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 201 rp[_i] = bcol; \ 202 ap[bs2*_i + bs*cidx + ridx] = value; \ 203 b_noinsert:; \ 204 bilen[brow] = nrow; \ 205 } 206 207 #if defined(PETSC_USE_MAT_SINGLE) 208 #undef __FUNCT__ 209 #define __FUNCT__ "MatSetValues_MPIBAIJ" 210 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 211 { 212 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 213 PetscErrorCode ierr; 214 PetscInt i,N = m*n; 215 MatScalar *vsingle; 216 217 PetscFunctionBegin; 218 if (N > b->setvalueslen) { 219 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 220 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 221 b->setvalueslen = N; 222 } 223 vsingle = b->setvaluescopy; 224 225 for (i=0; i<N; i++) { 226 vsingle[i] = v[i]; 227 } 228 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 229 PetscFunctionReturn(0); 230 } 231 232 #undef __FUNCT__ 233 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 234 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 235 { 236 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 237 PetscErrorCode ierr; 238 PetscInt i,N = m*n*b->bs2; 239 MatScalar *vsingle; 240 241 PetscFunctionBegin; 242 if (N > b->setvalueslen) { 243 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 244 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 245 b->setvalueslen = N; 246 } 247 vsingle = b->setvaluescopy; 248 for (i=0; i<N; i++) { 249 vsingle[i] = v[i]; 250 } 251 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 252 PetscFunctionReturn(0); 253 } 254 255 #undef __FUNCT__ 256 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 257 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 258 { 259 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 260 PetscErrorCode ierr; 261 PetscInt i,N = m*n; 262 MatScalar *vsingle; 263 264 PetscFunctionBegin; 265 if (N > b->setvalueslen) { 266 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 267 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 268 b->setvalueslen = N; 269 } 270 vsingle = b->setvaluescopy; 271 for (i=0; i<N; i++) { 272 vsingle[i] = v[i]; 273 } 274 ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 280 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 281 { 282 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 283 PetscErrorCode ierr; 284 PetscInt i,N = m*n*b->bs2; 285 MatScalar *vsingle; 286 287 PetscFunctionBegin; 288 if (N > b->setvalueslen) { 289 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 290 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 291 b->setvalueslen = N; 292 } 293 vsingle = b->setvaluescopy; 294 for (i=0; i<N; i++) { 295 vsingle[i] = v[i]; 296 } 297 ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 298 PetscFunctionReturn(0); 299 } 300 #endif 301 302 #undef __FUNCT__ 303 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 304 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 305 { 306 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 307 MatScalar value; 308 PetscTruth roworiented = baij->roworiented; 309 PetscErrorCode ierr; 310 PetscInt i,j,row,col; 311 PetscInt rstart_orig=mat->rmap.rstart; 312 PetscInt rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart; 313 PetscInt cend_orig=mat->cmap.rend,bs=mat->rmap.bs; 314 315 /* Some Variables required in the macro */ 316 Mat A = baij->A; 317 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 318 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 319 MatScalar *aa=a->a; 320 321 Mat B = baij->B; 322 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 323 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 324 MatScalar *ba=b->a; 325 326 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 327 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 328 MatScalar *ap,*bap; 329 330 PetscFunctionBegin; 331 for (i=0; i<m; i++) { 332 if (im[i] < 0) continue; 333 #if defined(PETSC_USE_DEBUG) 334 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 335 #endif 336 if (im[i] >= rstart_orig && im[i] < rend_orig) { 337 row = im[i] - rstart_orig; 338 for (j=0; j<n; j++) { 339 if (in[j] >= cstart_orig && in[j] < cend_orig){ 340 col = in[j] - cstart_orig; 341 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 342 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 343 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 344 } else if (in[j] < 0) continue; 345 #if defined(PETSC_USE_DEBUG) 346 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap.N-1);} 347 #endif 348 else { 349 if (mat->was_assembled) { 350 if (!baij->colmap) { 351 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 352 } 353 #if defined (PETSC_USE_CTABLE) 354 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 355 col = col - 1; 356 #else 357 col = baij->colmap[in[j]/bs] - 1; 358 #endif 359 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 360 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 361 col = in[j]; 362 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 363 B = baij->B; 364 b = (Mat_SeqBAIJ*)(B)->data; 365 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 366 ba=b->a; 367 } else col += in[j]%bs; 368 } else col = in[j]; 369 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 370 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 371 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 372 } 373 } 374 } else { 375 if (!baij->donotstash) { 376 if (roworiented) { 377 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 378 } else { 379 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 380 } 381 } 382 } 383 } 384 PetscFunctionReturn(0); 385 } 386 387 #undef __FUNCT__ 388 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar" 389 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 390 { 391 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 392 const MatScalar *value; 393 MatScalar *barray=baij->barray; 394 PetscTruth roworiented = baij->roworiented; 395 PetscErrorCode ierr; 396 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 397 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 398 PetscInt cend=baij->cendbs,bs=mat->rmap.bs,bs2=baij->bs2; 399 400 PetscFunctionBegin; 401 if(!barray) { 402 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 403 baij->barray = barray; 404 } 405 406 if (roworiented) { 407 stepval = (n-1)*bs; 408 } else { 409 stepval = (m-1)*bs; 410 } 411 for (i=0; i<m; i++) { 412 if (im[i] < 0) continue; 413 #if defined(PETSC_USE_DEBUG) 414 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 415 #endif 416 if (im[i] >= rstart && im[i] < rend) { 417 row = im[i] - rstart; 418 for (j=0; j<n; j++) { 419 /* If NumCol = 1 then a copy is not required */ 420 if ((roworiented) && (n == 1)) { 421 barray = (MatScalar*)v + i*bs2; 422 } else if((!roworiented) && (m == 1)) { 423 barray = (MatScalar*)v + j*bs2; 424 } else { /* Here a copy is required */ 425 if (roworiented) { 426 value = v + i*(stepval+bs)*bs + j*bs; 427 } else { 428 value = v + j*(stepval+bs)*bs + i*bs; 429 } 430 for (ii=0; ii<bs; ii++,value+=stepval) { 431 for (jj=0; jj<bs; jj++) { 432 *barray++ = *value++; 433 } 434 } 435 barray -=bs2; 436 } 437 438 if (in[j] >= cstart && in[j] < cend){ 439 col = in[j] - cstart; 440 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 441 } 442 else if (in[j] < 0) continue; 443 #if defined(PETSC_USE_DEBUG) 444 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 445 #endif 446 else { 447 if (mat->was_assembled) { 448 if (!baij->colmap) { 449 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 450 } 451 452 #if defined(PETSC_USE_DEBUG) 453 #if defined (PETSC_USE_CTABLE) 454 { PetscInt data; 455 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 456 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 457 } 458 #else 459 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 460 #endif 461 #endif 462 #if defined (PETSC_USE_CTABLE) 463 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 464 col = (col - 1)/bs; 465 #else 466 col = (baij->colmap[in[j]] - 1)/bs; 467 #endif 468 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 469 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 470 col = in[j]; 471 } 472 } 473 else col = in[j]; 474 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 475 } 476 } 477 } else { 478 if (!baij->donotstash) { 479 if (roworiented) { 480 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 481 } else { 482 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 483 } 484 } 485 } 486 } 487 PetscFunctionReturn(0); 488 } 489 490 #define HASH_KEY 0.6180339887 491 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 492 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 493 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 494 #undef __FUNCT__ 495 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar" 496 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 497 { 498 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 499 PetscTruth roworiented = baij->roworiented; 500 PetscErrorCode ierr; 501 PetscInt i,j,row,col; 502 PetscInt rstart_orig=mat->rmap.rstart; 503 PetscInt rend_orig=mat->rmap.rend,Nbs=baij->Nbs; 504 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap.bs,*HT=baij->ht,idx; 505 PetscReal tmp; 506 MatScalar **HD = baij->hd,value; 507 #if defined(PETSC_USE_DEBUG) 508 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 509 #endif 510 511 PetscFunctionBegin; 512 513 for (i=0; i<m; i++) { 514 #if defined(PETSC_USE_DEBUG) 515 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 516 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 517 #endif 518 row = im[i]; 519 if (row >= rstart_orig && row < rend_orig) { 520 for (j=0; j<n; j++) { 521 col = in[j]; 522 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 523 /* Look up PetscInto the Hash Table */ 524 key = (row/bs)*Nbs+(col/bs)+1; 525 h1 = HASH(size,key,tmp); 526 527 528 idx = h1; 529 #if defined(PETSC_USE_DEBUG) 530 insert_ct++; 531 total_ct++; 532 if (HT[idx] != key) { 533 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 534 if (idx == size) { 535 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 536 if (idx == h1) { 537 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 538 } 539 } 540 } 541 #else 542 if (HT[idx] != key) { 543 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 544 if (idx == size) { 545 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 546 if (idx == h1) { 547 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 548 } 549 } 550 } 551 #endif 552 /* A HASH table entry is found, so insert the values at the correct address */ 553 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 554 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 555 } 556 } else { 557 if (!baij->donotstash) { 558 if (roworiented) { 559 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 560 } else { 561 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 562 } 563 } 564 } 565 } 566 #if defined(PETSC_USE_DEBUG) 567 baij->ht_total_ct = total_ct; 568 baij->ht_insert_ct = insert_ct; 569 #endif 570 PetscFunctionReturn(0); 571 } 572 573 #undef __FUNCT__ 574 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 575 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 576 { 577 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 578 PetscTruth roworiented = baij->roworiented; 579 PetscErrorCode ierr; 580 PetscInt i,j,ii,jj,row,col; 581 PetscInt rstart=baij->rstartbs; 582 PetscInt rend=mat->rmap.rend,stepval,bs=mat->rmap.bs,bs2=baij->bs2; 583 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 584 PetscReal tmp; 585 MatScalar **HD = baij->hd,*baij_a; 586 const MatScalar *v_t,*value; 587 #if defined(PETSC_USE_DEBUG) 588 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 589 #endif 590 591 PetscFunctionBegin; 592 593 if (roworiented) { 594 stepval = (n-1)*bs; 595 } else { 596 stepval = (m-1)*bs; 597 } 598 for (i=0; i<m; i++) { 599 #if defined(PETSC_USE_DEBUG) 600 if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 601 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 602 #endif 603 row = im[i]; 604 v_t = v + i*bs2; 605 if (row >= rstart && row < rend) { 606 for (j=0; j<n; j++) { 607 col = in[j]; 608 609 /* Look up into the Hash Table */ 610 key = row*Nbs+col+1; 611 h1 = HASH(size,key,tmp); 612 613 idx = h1; 614 #if defined(PETSC_USE_DEBUG) 615 total_ct++; 616 insert_ct++; 617 if (HT[idx] != key) { 618 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 619 if (idx == size) { 620 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 621 if (idx == h1) { 622 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 623 } 624 } 625 } 626 #else 627 if (HT[idx] != key) { 628 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 629 if (idx == size) { 630 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 631 if (idx == h1) { 632 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 633 } 634 } 635 } 636 #endif 637 baij_a = HD[idx]; 638 if (roworiented) { 639 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 640 /* value = v + (i*(stepval+bs)+j)*bs; */ 641 value = v_t; 642 v_t += bs; 643 if (addv == ADD_VALUES) { 644 for (ii=0; ii<bs; ii++,value+=stepval) { 645 for (jj=ii; jj<bs2; jj+=bs) { 646 baij_a[jj] += *value++; 647 } 648 } 649 } else { 650 for (ii=0; ii<bs; ii++,value+=stepval) { 651 for (jj=ii; jj<bs2; jj+=bs) { 652 baij_a[jj] = *value++; 653 } 654 } 655 } 656 } else { 657 value = v + j*(stepval+bs)*bs + i*bs; 658 if (addv == ADD_VALUES) { 659 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 660 for (jj=0; jj<bs; jj++) { 661 baij_a[jj] += *value++; 662 } 663 } 664 } else { 665 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 666 for (jj=0; jj<bs; jj++) { 667 baij_a[jj] = *value++; 668 } 669 } 670 } 671 } 672 } 673 } else { 674 if (!baij->donotstash) { 675 if (roworiented) { 676 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 677 } else { 678 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 679 } 680 } 681 } 682 } 683 #if defined(PETSC_USE_DEBUG) 684 baij->ht_total_ct = total_ct; 685 baij->ht_insert_ct = insert_ct; 686 #endif 687 PetscFunctionReturn(0); 688 } 689 690 #undef __FUNCT__ 691 #define __FUNCT__ "MatGetValues_MPIBAIJ" 692 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 693 { 694 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 695 PetscErrorCode ierr; 696 PetscInt bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend; 697 PetscInt bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data; 698 699 PetscFunctionBegin; 700 for (i=0; i<m; i++) { 701 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 702 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 703 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 704 row = idxm[i] - bsrstart; 705 for (j=0; j<n; j++) { 706 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 707 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 708 if (idxn[j] >= bscstart && idxn[j] < bscend){ 709 col = idxn[j] - bscstart; 710 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 711 } else { 712 if (!baij->colmap) { 713 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 714 } 715 #if defined (PETSC_USE_CTABLE) 716 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 717 data --; 718 #else 719 data = baij->colmap[idxn[j]/bs]-1; 720 #endif 721 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 722 else { 723 col = data + idxn[j]%bs; 724 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 725 } 726 } 727 } 728 } else { 729 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 730 } 731 } 732 PetscFunctionReturn(0); 733 } 734 735 #undef __FUNCT__ 736 #define __FUNCT__ "MatNorm_MPIBAIJ" 737 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 738 { 739 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 740 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 741 PetscErrorCode ierr; 742 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap.bs,nz,row,col; 743 PetscReal sum = 0.0; 744 MatScalar *v; 745 746 PetscFunctionBegin; 747 if (baij->size == 1) { 748 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 749 } else { 750 if (type == NORM_FROBENIUS) { 751 v = amat->a; 752 nz = amat->nz*bs2; 753 for (i=0; i<nz; i++) { 754 #if defined(PETSC_USE_COMPLEX) 755 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 756 #else 757 sum += (*v)*(*v); v++; 758 #endif 759 } 760 v = bmat->a; 761 nz = bmat->nz*bs2; 762 for (i=0; i<nz; i++) { 763 #if defined(PETSC_USE_COMPLEX) 764 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 765 #else 766 sum += (*v)*(*v); v++; 767 #endif 768 } 769 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 770 *nrm = sqrt(*nrm); 771 } else if (type == NORM_1) { /* max column sum */ 772 PetscReal *tmp,*tmp2; 773 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 774 ierr = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 775 tmp2 = tmp + mat->cmap.N; 776 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 777 v = amat->a; jj = amat->j; 778 for (i=0; i<amat->nz; i++) { 779 for (j=0; j<bs; j++){ 780 col = bs*(cstart + *jj) + j; /* column index */ 781 for (row=0; row<bs; row++){ 782 tmp[col] += PetscAbsScalar(*v); v++; 783 } 784 } 785 jj++; 786 } 787 v = bmat->a; jj = bmat->j; 788 for (i=0; i<bmat->nz; i++) { 789 for (j=0; j<bs; j++){ 790 col = bs*garray[*jj] + j; 791 for (row=0; row<bs; row++){ 792 tmp[col] += PetscAbsScalar(*v); v++; 793 } 794 } 795 jj++; 796 } 797 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 798 *nrm = 0.0; 799 for (j=0; j<mat->cmap.N; j++) { 800 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 801 } 802 ierr = PetscFree(tmp);CHKERRQ(ierr); 803 } else if (type == NORM_INFINITY) { /* max row sum */ 804 PetscReal *sums; 805 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr) 806 sum = 0.0; 807 for (j=0; j<amat->mbs; j++) { 808 for (row=0; row<bs; row++) sums[row] = 0.0; 809 v = amat->a + bs2*amat->i[j]; 810 nz = amat->i[j+1]-amat->i[j]; 811 for (i=0; i<nz; i++) { 812 for (col=0; col<bs; col++){ 813 for (row=0; row<bs; row++){ 814 sums[row] += PetscAbsScalar(*v); v++; 815 } 816 } 817 } 818 v = bmat->a + bs2*bmat->i[j]; 819 nz = bmat->i[j+1]-bmat->i[j]; 820 for (i=0; i<nz; i++) { 821 for (col=0; col<bs; col++){ 822 for (row=0; row<bs; row++){ 823 sums[row] += PetscAbsScalar(*v); v++; 824 } 825 } 826 } 827 for (row=0; row<bs; row++){ 828 if (sums[row] > sum) sum = sums[row]; 829 } 830 } 831 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 832 ierr = PetscFree(sums);CHKERRQ(ierr); 833 } else { 834 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 835 } 836 } 837 PetscFunctionReturn(0); 838 } 839 840 /* 841 Creates the hash table, and sets the table 842 This table is created only once. 843 If new entried need to be added to the matrix 844 then the hash table has to be destroyed and 845 recreated. 846 */ 847 #undef __FUNCT__ 848 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 849 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 850 { 851 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 852 Mat A = baij->A,B=baij->B; 853 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 854 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 855 PetscErrorCode ierr; 856 PetscInt size,bs2=baij->bs2,rstart=baij->rstartbs; 857 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 858 PetscInt *HT,key; 859 MatScalar **HD; 860 PetscReal tmp; 861 #if defined(PETSC_USE_INFO) 862 PetscInt ct=0,max=0; 863 #endif 864 865 PetscFunctionBegin; 866 baij->ht_size=(PetscInt)(factor*nz); 867 size = baij->ht_size; 868 869 if (baij->ht) { 870 PetscFunctionReturn(0); 871 } 872 873 /* Allocate Memory for Hash Table */ 874 ierr = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 875 baij->ht = (PetscInt*)(baij->hd + size); 876 HD = baij->hd; 877 HT = baij->ht; 878 879 880 ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr); 881 882 883 /* Loop Over A */ 884 for (i=0; i<a->mbs; i++) { 885 for (j=ai[i]; j<ai[i+1]; j++) { 886 row = i+rstart; 887 col = aj[j]+cstart; 888 889 key = row*Nbs + col + 1; 890 h1 = HASH(size,key,tmp); 891 for (k=0; k<size; k++){ 892 if (!HT[(h1+k)%size]) { 893 HT[(h1+k)%size] = key; 894 HD[(h1+k)%size] = a->a + j*bs2; 895 break; 896 #if defined(PETSC_USE_INFO) 897 } else { 898 ct++; 899 #endif 900 } 901 } 902 #if defined(PETSC_USE_INFO) 903 if (k> max) max = k; 904 #endif 905 } 906 } 907 /* Loop Over B */ 908 for (i=0; i<b->mbs; i++) { 909 for (j=bi[i]; j<bi[i+1]; j++) { 910 row = i+rstart; 911 col = garray[bj[j]]; 912 key = row*Nbs + col + 1; 913 h1 = HASH(size,key,tmp); 914 for (k=0; k<size; k++){ 915 if (!HT[(h1+k)%size]) { 916 HT[(h1+k)%size] = key; 917 HD[(h1+k)%size] = b->a + j*bs2; 918 break; 919 #if defined(PETSC_USE_INFO) 920 } else { 921 ct++; 922 #endif 923 } 924 } 925 #if defined(PETSC_USE_INFO) 926 if (k> max) max = k; 927 #endif 928 } 929 } 930 931 /* Print Summary */ 932 #if defined(PETSC_USE_INFO) 933 for (i=0,j=0; i<size; i++) { 934 if (HT[i]) {j++;} 935 } 936 ierr = PetscInfo2(0,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 937 #endif 938 PetscFunctionReturn(0); 939 } 940 941 #undef __FUNCT__ 942 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 943 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 944 { 945 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 946 PetscErrorCode ierr; 947 PetscInt nstash,reallocs; 948 InsertMode addv; 949 950 PetscFunctionBegin; 951 if (baij->donotstash) { 952 PetscFunctionReturn(0); 953 } 954 955 /* make sure all processors are either in INSERTMODE or ADDMODE */ 956 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 957 if (addv == (ADD_VALUES|INSERT_VALUES)) { 958 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 959 } 960 mat->insertmode = addv; /* in case this processor had no cache */ 961 962 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 963 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rangebs);CHKERRQ(ierr); 964 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 965 ierr = PetscInfo2(0,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 966 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 967 ierr = PetscInfo2(0,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 968 PetscFunctionReturn(0); 969 } 970 971 #undef __FUNCT__ 972 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 973 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 974 { 975 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 976 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 977 PetscErrorCode ierr; 978 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 979 PetscInt *row,*col,other_disassembled; 980 PetscTruth r1,r2,r3; 981 MatScalar *val; 982 InsertMode addv = mat->insertmode; 983 PetscMPIInt n; 984 985 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 986 PetscFunctionBegin; 987 if (!baij->donotstash) { 988 while (1) { 989 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 990 if (!flg) break; 991 992 for (i=0; i<n;) { 993 /* Now identify the consecutive vals belonging to the same row */ 994 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 995 if (j < n) ncols = j-i; 996 else ncols = n-i; 997 /* Now assemble all these values with a single function call */ 998 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 999 i = j; 1000 } 1001 } 1002 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 1003 /* Now process the block-stash. Since the values are stashed column-oriented, 1004 set the roworiented flag to column oriented, and after MatSetValues() 1005 restore the original flags */ 1006 r1 = baij->roworiented; 1007 r2 = a->roworiented; 1008 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 1009 baij->roworiented = PETSC_FALSE; 1010 a->roworiented = PETSC_FALSE; 1011 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 1012 while (1) { 1013 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1014 if (!flg) break; 1015 1016 for (i=0; i<n;) { 1017 /* Now identify the consecutive vals belonging to the same row */ 1018 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1019 if (j < n) ncols = j-i; 1020 else ncols = n-i; 1021 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1022 i = j; 1023 } 1024 } 1025 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1026 baij->roworiented = r1; 1027 a->roworiented = r2; 1028 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 1029 } 1030 1031 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1032 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1033 1034 /* determine if any processor has disassembled, if so we must 1035 also disassemble ourselfs, in order that we may reassemble. */ 1036 /* 1037 if nonzero structure of submatrix B cannot change then we know that 1038 no processor disassembled thus we can skip this stuff 1039 */ 1040 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1041 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1042 if (mat->was_assembled && !other_disassembled) { 1043 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1044 } 1045 } 1046 1047 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1048 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1049 } 1050 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 1051 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1052 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1053 1054 #if defined(PETSC_USE_INFO) 1055 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1056 ierr = PetscInfo1(0,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 1057 baij->ht_total_ct = 0; 1058 baij->ht_insert_ct = 0; 1059 } 1060 #endif 1061 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1062 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1063 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1064 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1065 } 1066 1067 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1068 baij->rowvalues = 0; 1069 PetscFunctionReturn(0); 1070 } 1071 1072 #undef __FUNCT__ 1073 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 1074 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1075 { 1076 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1077 PetscErrorCode ierr; 1078 PetscMPIInt size = baij->size,rank = baij->rank; 1079 PetscInt bs = mat->rmap.bs; 1080 PetscTruth iascii,isdraw; 1081 PetscViewer sviewer; 1082 PetscViewerFormat format; 1083 1084 PetscFunctionBegin; 1085 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1086 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1087 if (iascii) { 1088 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1089 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1090 MatInfo info; 1091 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1092 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1093 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 1094 rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 1095 mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr); 1096 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1097 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1098 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1099 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1100 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1101 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1102 PetscFunctionReturn(0); 1103 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1104 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1105 PetscFunctionReturn(0); 1106 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1107 PetscFunctionReturn(0); 1108 } 1109 } 1110 1111 if (isdraw) { 1112 PetscDraw draw; 1113 PetscTruth isnull; 1114 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1115 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1116 } 1117 1118 if (size == 1) { 1119 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 1120 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1121 } else { 1122 /* assemble the entire matrix onto first processor. */ 1123 Mat A; 1124 Mat_SeqBAIJ *Aloc; 1125 PetscInt M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1126 MatScalar *a; 1127 1128 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1129 /* Perhaps this should be the type of mat? */ 1130 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 1131 if (!rank) { 1132 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1133 } else { 1134 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1135 } 1136 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1137 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1138 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1139 1140 /* copy over the A part */ 1141 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1142 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1143 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1144 1145 for (i=0; i<mbs; i++) { 1146 rvals[0] = bs*(baij->rstartbs + i); 1147 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1148 for (j=ai[i]; j<ai[i+1]; j++) { 1149 col = (baij->cstartbs+aj[j])*bs; 1150 for (k=0; k<bs; k++) { 1151 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1152 col++; a += bs; 1153 } 1154 } 1155 } 1156 /* copy over the B part */ 1157 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1158 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1159 for (i=0; i<mbs; i++) { 1160 rvals[0] = bs*(baij->rstartbs + i); 1161 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1162 for (j=ai[i]; j<ai[i+1]; j++) { 1163 col = baij->garray[aj[j]]*bs; 1164 for (k=0; k<bs; k++) { 1165 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1166 col++; a += bs; 1167 } 1168 } 1169 } 1170 ierr = PetscFree(rvals);CHKERRQ(ierr); 1171 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1172 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1173 /* 1174 Everyone has to call to draw the matrix since the graphics waits are 1175 synchronized across all processors that share the PetscDraw object 1176 */ 1177 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1178 if (!rank) { 1179 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1180 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1181 } 1182 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1183 ierr = MatDestroy(A);CHKERRQ(ierr); 1184 } 1185 PetscFunctionReturn(0); 1186 } 1187 1188 #undef __FUNCT__ 1189 #define __FUNCT__ "MatView_MPIBAIJ" 1190 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1191 { 1192 PetscErrorCode ierr; 1193 PetscTruth iascii,isdraw,issocket,isbinary; 1194 1195 PetscFunctionBegin; 1196 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1197 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1198 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1199 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1200 if (iascii || isdraw || issocket || isbinary) { 1201 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1202 } else { 1203 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1204 } 1205 PetscFunctionReturn(0); 1206 } 1207 1208 #undef __FUNCT__ 1209 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1210 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1211 { 1212 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1213 PetscErrorCode ierr; 1214 1215 PetscFunctionBegin; 1216 #if defined(PETSC_USE_LOG) 1217 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N); 1218 #endif 1219 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1220 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1221 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1222 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1223 #if defined (PETSC_USE_CTABLE) 1224 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1225 #else 1226 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1227 #endif 1228 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1229 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1230 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1231 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1232 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1233 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 1234 #if defined(PETSC_USE_MAT_SINGLE) 1235 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 1236 #endif 1237 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1238 ierr = PetscFree(baij);CHKERRQ(ierr); 1239 1240 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1241 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1242 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1243 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1244 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1245 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1246 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1247 PetscFunctionReturn(0); 1248 } 1249 1250 #undef __FUNCT__ 1251 #define __FUNCT__ "MatMult_MPIBAIJ" 1252 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1253 { 1254 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1255 PetscErrorCode ierr; 1256 PetscInt nt; 1257 1258 PetscFunctionBegin; 1259 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1260 if (nt != A->cmap.n) { 1261 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1262 } 1263 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1264 if (nt != A->rmap.n) { 1265 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1266 } 1267 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1268 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1269 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1270 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1271 PetscFunctionReturn(0); 1272 } 1273 1274 #undef __FUNCT__ 1275 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1276 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1277 { 1278 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1279 PetscErrorCode ierr; 1280 1281 PetscFunctionBegin; 1282 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1283 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1284 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1285 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1286 PetscFunctionReturn(0); 1287 } 1288 1289 #undef __FUNCT__ 1290 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1291 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1292 { 1293 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1294 PetscErrorCode ierr; 1295 PetscTruth merged; 1296 1297 PetscFunctionBegin; 1298 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1299 /* do nondiagonal part */ 1300 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1301 if (!merged) { 1302 /* send it on its way */ 1303 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1304 /* do local part */ 1305 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1306 /* receive remote parts: note this assumes the values are not actually */ 1307 /* inserted in yy until the next line */ 1308 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1309 } else { 1310 /* do local part */ 1311 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1312 /* send it on its way */ 1313 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1314 /* values actually were received in the Begin() but we need to call this nop */ 1315 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1316 } 1317 PetscFunctionReturn(0); 1318 } 1319 1320 #undef __FUNCT__ 1321 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1322 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1323 { 1324 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1325 PetscErrorCode ierr; 1326 1327 PetscFunctionBegin; 1328 /* do nondiagonal part */ 1329 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1330 /* send it on its way */ 1331 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1332 /* do local part */ 1333 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1334 /* receive remote parts: note this assumes the values are not actually */ 1335 /* inserted in yy until the next line, which is true for my implementation*/ 1336 /* but is not perhaps always true. */ 1337 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1338 PetscFunctionReturn(0); 1339 } 1340 1341 /* 1342 This only works correctly for square matrices where the subblock A->A is the 1343 diagonal block 1344 */ 1345 #undef __FUNCT__ 1346 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1347 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1348 { 1349 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1350 PetscErrorCode ierr; 1351 1352 PetscFunctionBegin; 1353 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1354 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1355 PetscFunctionReturn(0); 1356 } 1357 1358 #undef __FUNCT__ 1359 #define __FUNCT__ "MatScale_MPIBAIJ" 1360 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1361 { 1362 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1363 PetscErrorCode ierr; 1364 1365 PetscFunctionBegin; 1366 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1367 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1368 PetscFunctionReturn(0); 1369 } 1370 1371 #undef __FUNCT__ 1372 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1373 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1374 { 1375 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1376 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1377 PetscErrorCode ierr; 1378 PetscInt bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1379 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend; 1380 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1381 1382 PetscFunctionBegin; 1383 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1384 mat->getrowactive = PETSC_TRUE; 1385 1386 if (!mat->rowvalues && (idx || v)) { 1387 /* 1388 allocate enough space to hold information from the longest row. 1389 */ 1390 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1391 PetscInt max = 1,mbs = mat->mbs,tmp; 1392 for (i=0; i<mbs; i++) { 1393 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1394 if (max < tmp) { max = tmp; } 1395 } 1396 ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1397 mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1398 } 1399 1400 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1401 lrow = row - brstart; 1402 1403 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1404 if (!v) {pvA = 0; pvB = 0;} 1405 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1406 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1407 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1408 nztot = nzA + nzB; 1409 1410 cmap = mat->garray; 1411 if (v || idx) { 1412 if (nztot) { 1413 /* Sort by increasing column numbers, assuming A and B already sorted */ 1414 PetscInt imark = -1; 1415 if (v) { 1416 *v = v_p = mat->rowvalues; 1417 for (i=0; i<nzB; i++) { 1418 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1419 else break; 1420 } 1421 imark = i; 1422 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1423 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1424 } 1425 if (idx) { 1426 *idx = idx_p = mat->rowindices; 1427 if (imark > -1) { 1428 for (i=0; i<imark; i++) { 1429 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1430 } 1431 } else { 1432 for (i=0; i<nzB; i++) { 1433 if (cmap[cworkB[i]/bs] < cstart) 1434 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1435 else break; 1436 } 1437 imark = i; 1438 } 1439 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1440 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1441 } 1442 } else { 1443 if (idx) *idx = 0; 1444 if (v) *v = 0; 1445 } 1446 } 1447 *nz = nztot; 1448 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1449 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1450 PetscFunctionReturn(0); 1451 } 1452 1453 #undef __FUNCT__ 1454 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1455 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1456 { 1457 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1458 1459 PetscFunctionBegin; 1460 if (!baij->getrowactive) { 1461 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1462 } 1463 baij->getrowactive = PETSC_FALSE; 1464 PetscFunctionReturn(0); 1465 } 1466 1467 #undef __FUNCT__ 1468 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1469 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1470 { 1471 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1472 PetscErrorCode ierr; 1473 1474 PetscFunctionBegin; 1475 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1476 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1477 PetscFunctionReturn(0); 1478 } 1479 1480 #undef __FUNCT__ 1481 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1482 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1483 { 1484 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1485 Mat A = a->A,B = a->B; 1486 PetscErrorCode ierr; 1487 PetscReal isend[5],irecv[5]; 1488 1489 PetscFunctionBegin; 1490 info->block_size = (PetscReal)matin->rmap.bs; 1491 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1492 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1493 isend[3] = info->memory; isend[4] = info->mallocs; 1494 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1495 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1496 isend[3] += info->memory; isend[4] += info->mallocs; 1497 if (flag == MAT_LOCAL) { 1498 info->nz_used = isend[0]; 1499 info->nz_allocated = isend[1]; 1500 info->nz_unneeded = isend[2]; 1501 info->memory = isend[3]; 1502 info->mallocs = isend[4]; 1503 } else if (flag == MAT_GLOBAL_MAX) { 1504 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1505 info->nz_used = irecv[0]; 1506 info->nz_allocated = irecv[1]; 1507 info->nz_unneeded = irecv[2]; 1508 info->memory = irecv[3]; 1509 info->mallocs = irecv[4]; 1510 } else if (flag == MAT_GLOBAL_SUM) { 1511 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1512 info->nz_used = irecv[0]; 1513 info->nz_allocated = irecv[1]; 1514 info->nz_unneeded = irecv[2]; 1515 info->memory = irecv[3]; 1516 info->mallocs = irecv[4]; 1517 } else { 1518 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1519 } 1520 info->rows_global = (PetscReal)A->rmap.N; 1521 info->columns_global = (PetscReal)A->cmap.N; 1522 info->rows_local = (PetscReal)A->rmap.N; 1523 info->columns_local = (PetscReal)A->cmap.N; 1524 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1525 info->fill_ratio_needed = 0; 1526 info->factor_mallocs = 0; 1527 PetscFunctionReturn(0); 1528 } 1529 1530 #undef __FUNCT__ 1531 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1532 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op) 1533 { 1534 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1535 PetscErrorCode ierr; 1536 1537 PetscFunctionBegin; 1538 switch (op) { 1539 case MAT_NO_NEW_NONZERO_LOCATIONS: 1540 case MAT_YES_NEW_NONZERO_LOCATIONS: 1541 case MAT_COLUMNS_UNSORTED: 1542 case MAT_COLUMNS_SORTED: 1543 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1544 case MAT_KEEP_ZEROED_ROWS: 1545 case MAT_NEW_NONZERO_LOCATION_ERR: 1546 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1547 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1548 break; 1549 case MAT_ROW_ORIENTED: 1550 a->roworiented = PETSC_TRUE; 1551 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1552 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1553 break; 1554 case MAT_ROWS_SORTED: 1555 case MAT_ROWS_UNSORTED: 1556 case MAT_YES_NEW_DIAGONALS: 1557 ierr = PetscInfo(A,"Option ignored\n");CHKERRQ(ierr); 1558 break; 1559 case MAT_COLUMN_ORIENTED: 1560 a->roworiented = PETSC_FALSE; 1561 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1562 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1563 break; 1564 case MAT_IGNORE_OFF_PROC_ENTRIES: 1565 a->donotstash = PETSC_TRUE; 1566 break; 1567 case MAT_NO_NEW_DIAGONALS: 1568 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1569 case MAT_USE_HASH_TABLE: 1570 a->ht_flag = PETSC_TRUE; 1571 break; 1572 case MAT_SYMMETRIC: 1573 case MAT_STRUCTURALLY_SYMMETRIC: 1574 case MAT_HERMITIAN: 1575 case MAT_SYMMETRY_ETERNAL: 1576 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1577 break; 1578 case MAT_NOT_SYMMETRIC: 1579 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1580 case MAT_NOT_HERMITIAN: 1581 case MAT_NOT_SYMMETRY_ETERNAL: 1582 break; 1583 default: 1584 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1585 } 1586 PetscFunctionReturn(0); 1587 } 1588 1589 #undef __FUNCT__ 1590 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1591 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1592 { 1593 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1594 Mat_SeqBAIJ *Aloc; 1595 Mat B; 1596 PetscErrorCode ierr; 1597 PetscInt M=A->rmap.N,N=A->cmap.N,*ai,*aj,i,*rvals,j,k,col; 1598 PetscInt bs=A->rmap.bs,mbs=baij->mbs; 1599 MatScalar *a; 1600 1601 PetscFunctionBegin; 1602 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1603 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1604 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1605 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1606 ierr = MatMPIBAIJSetPreallocation(B,A->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1607 1608 /* copy over the A part */ 1609 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1610 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1611 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1612 1613 for (i=0; i<mbs; i++) { 1614 rvals[0] = bs*(baij->rstartbs + i); 1615 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1616 for (j=ai[i]; j<ai[i+1]; j++) { 1617 col = (baij->cstartbs+aj[j])*bs; 1618 for (k=0; k<bs; k++) { 1619 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1620 col++; a += bs; 1621 } 1622 } 1623 } 1624 /* copy over the B part */ 1625 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1626 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1627 for (i=0; i<mbs; i++) { 1628 rvals[0] = bs*(baij->rstartbs + i); 1629 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1630 for (j=ai[i]; j<ai[i+1]; j++) { 1631 col = baij->garray[aj[j]]*bs; 1632 for (k=0; k<bs; k++) { 1633 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1634 col++; a += bs; 1635 } 1636 } 1637 } 1638 ierr = PetscFree(rvals);CHKERRQ(ierr); 1639 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1640 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1641 1642 if (matout) { 1643 *matout = B; 1644 } else { 1645 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1646 } 1647 PetscFunctionReturn(0); 1648 } 1649 1650 #undef __FUNCT__ 1651 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1652 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1653 { 1654 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1655 Mat a = baij->A,b = baij->B; 1656 PetscErrorCode ierr; 1657 PetscInt s1,s2,s3; 1658 1659 PetscFunctionBegin; 1660 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1661 if (rr) { 1662 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1663 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1664 /* Overlap communication with computation. */ 1665 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1666 } 1667 if (ll) { 1668 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1669 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1670 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1671 } 1672 /* scale the diagonal block */ 1673 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1674 1675 if (rr) { 1676 /* Do a scatter end and then right scale the off-diagonal block */ 1677 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1678 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1679 } 1680 1681 PetscFunctionReturn(0); 1682 } 1683 1684 #undef __FUNCT__ 1685 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1686 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 1687 { 1688 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1689 PetscErrorCode ierr; 1690 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1691 PetscInt i,*owners = A->rmap.range; 1692 PetscInt *nprocs,j,idx,nsends,row; 1693 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1694 PetscInt *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1; 1695 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap.rstart; 1696 MPI_Comm comm = A->comm; 1697 MPI_Request *send_waits,*recv_waits; 1698 MPI_Status recv_status,*send_status; 1699 #if defined(PETSC_DEBUG) 1700 PetscTruth found = PETSC_FALSE; 1701 #endif 1702 1703 PetscFunctionBegin; 1704 /* first count number of contributors to each processor */ 1705 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1706 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1707 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1708 j = 0; 1709 for (i=0; i<N; i++) { 1710 if (lastidx > (idx = rows[i])) j = 0; 1711 lastidx = idx; 1712 for (; j<size; j++) { 1713 if (idx >= owners[j] && idx < owners[j+1]) { 1714 nprocs[2*j]++; 1715 nprocs[2*j+1] = 1; 1716 owner[i] = j; 1717 #if defined(PETSC_DEBUG) 1718 found = PETSC_TRUE; 1719 #endif 1720 break; 1721 } 1722 } 1723 #if defined(PETSC_DEBUG) 1724 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1725 found = PETSC_FALSE; 1726 #endif 1727 } 1728 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1729 1730 /* inform other processors of number of messages and max length*/ 1731 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1732 1733 /* post receives: */ 1734 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1735 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1736 for (i=0; i<nrecvs; i++) { 1737 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1738 } 1739 1740 /* do sends: 1741 1) starts[i] gives the starting index in svalues for stuff going to 1742 the ith processor 1743 */ 1744 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1745 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1746 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1747 starts[0] = 0; 1748 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1749 for (i=0; i<N; i++) { 1750 svalues[starts[owner[i]]++] = rows[i]; 1751 } 1752 1753 starts[0] = 0; 1754 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1755 count = 0; 1756 for (i=0; i<size; i++) { 1757 if (nprocs[2*i+1]) { 1758 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1759 } 1760 } 1761 ierr = PetscFree(starts);CHKERRQ(ierr); 1762 1763 base = owners[rank]; 1764 1765 /* wait on receives */ 1766 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 1767 source = lens + nrecvs; 1768 count = nrecvs; slen = 0; 1769 while (count) { 1770 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1771 /* unpack receives into our local space */ 1772 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1773 source[imdex] = recv_status.MPI_SOURCE; 1774 lens[imdex] = n; 1775 slen += n; 1776 count--; 1777 } 1778 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1779 1780 /* move the data into the send scatter */ 1781 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1782 count = 0; 1783 for (i=0; i<nrecvs; i++) { 1784 values = rvalues + i*nmax; 1785 for (j=0; j<lens[i]; j++) { 1786 lrows[count++] = values[j] - base; 1787 } 1788 } 1789 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1790 ierr = PetscFree(lens);CHKERRQ(ierr); 1791 ierr = PetscFree(owner);CHKERRQ(ierr); 1792 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1793 1794 /* actually zap the local rows */ 1795 /* 1796 Zero the required rows. If the "diagonal block" of the matrix 1797 is square and the user wishes to set the diagonal we use separate 1798 code so that MatSetValues() is not called for each diagonal allocating 1799 new memory, thus calling lots of mallocs and slowing things down. 1800 1801 Contributed by: Matthew Knepley 1802 */ 1803 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1804 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr); 1805 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 1806 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr); 1807 } else if (diag != 0.0) { 1808 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1809 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1810 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1811 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1812 } 1813 for (i=0; i<slen; i++) { 1814 row = lrows[i] + rstart_bs; 1815 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1816 } 1817 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1818 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1819 } else { 1820 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1821 } 1822 1823 ierr = PetscFree(lrows);CHKERRQ(ierr); 1824 1825 /* wait on sends */ 1826 if (nsends) { 1827 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1828 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1829 ierr = PetscFree(send_status);CHKERRQ(ierr); 1830 } 1831 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1832 ierr = PetscFree(svalues);CHKERRQ(ierr); 1833 1834 PetscFunctionReturn(0); 1835 } 1836 1837 #undef __FUNCT__ 1838 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1839 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1840 { 1841 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1842 PetscErrorCode ierr; 1843 1844 PetscFunctionBegin; 1845 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1846 PetscFunctionReturn(0); 1847 } 1848 1849 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1850 1851 #undef __FUNCT__ 1852 #define __FUNCT__ "MatEqual_MPIBAIJ" 1853 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1854 { 1855 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1856 Mat a,b,c,d; 1857 PetscTruth flg; 1858 PetscErrorCode ierr; 1859 1860 PetscFunctionBegin; 1861 a = matA->A; b = matA->B; 1862 c = matB->A; d = matB->B; 1863 1864 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1865 if (flg) { 1866 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1867 } 1868 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1869 PetscFunctionReturn(0); 1870 } 1871 1872 #undef __FUNCT__ 1873 #define __FUNCT__ "MatCopy_MPIBAIJ" 1874 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1875 { 1876 PetscErrorCode ierr; 1877 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1878 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1879 1880 PetscFunctionBegin; 1881 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1882 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1883 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1884 } else { 1885 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1886 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1887 } 1888 PetscFunctionReturn(0); 1889 } 1890 1891 #undef __FUNCT__ 1892 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1893 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1894 { 1895 PetscErrorCode ierr; 1896 1897 PetscFunctionBegin; 1898 ierr = MatMPIBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1899 PetscFunctionReturn(0); 1900 } 1901 1902 #include "petscblaslapack.h" 1903 #undef __FUNCT__ 1904 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1905 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1906 { 1907 PetscErrorCode ierr; 1908 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1909 PetscBLASInt bnz,one=1; 1910 Mat_SeqBAIJ *x,*y; 1911 1912 PetscFunctionBegin; 1913 if (str == SAME_NONZERO_PATTERN) { 1914 PetscScalar alpha = a; 1915 x = (Mat_SeqBAIJ *)xx->A->data; 1916 y = (Mat_SeqBAIJ *)yy->A->data; 1917 bnz = (PetscBLASInt)x->nz; 1918 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1919 x = (Mat_SeqBAIJ *)xx->B->data; 1920 y = (Mat_SeqBAIJ *)yy->B->data; 1921 bnz = (PetscBLASInt)x->nz; 1922 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1923 } else { 1924 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1925 } 1926 PetscFunctionReturn(0); 1927 } 1928 1929 #undef __FUNCT__ 1930 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1931 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1932 { 1933 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1934 PetscErrorCode ierr; 1935 1936 PetscFunctionBegin; 1937 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1938 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1939 PetscFunctionReturn(0); 1940 } 1941 1942 #undef __FUNCT__ 1943 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 1944 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1945 { 1946 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1947 PetscErrorCode ierr; 1948 1949 PetscFunctionBegin; 1950 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1951 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1952 PetscFunctionReturn(0); 1953 } 1954 1955 /* -------------------------------------------------------------------*/ 1956 static struct _MatOps MatOps_Values = { 1957 MatSetValues_MPIBAIJ, 1958 MatGetRow_MPIBAIJ, 1959 MatRestoreRow_MPIBAIJ, 1960 MatMult_MPIBAIJ, 1961 /* 4*/ MatMultAdd_MPIBAIJ, 1962 MatMultTranspose_MPIBAIJ, 1963 MatMultTransposeAdd_MPIBAIJ, 1964 0, 1965 0, 1966 0, 1967 /*10*/ 0, 1968 0, 1969 0, 1970 0, 1971 MatTranspose_MPIBAIJ, 1972 /*15*/ MatGetInfo_MPIBAIJ, 1973 MatEqual_MPIBAIJ, 1974 MatGetDiagonal_MPIBAIJ, 1975 MatDiagonalScale_MPIBAIJ, 1976 MatNorm_MPIBAIJ, 1977 /*20*/ MatAssemblyBegin_MPIBAIJ, 1978 MatAssemblyEnd_MPIBAIJ, 1979 0, 1980 MatSetOption_MPIBAIJ, 1981 MatZeroEntries_MPIBAIJ, 1982 /*25*/ MatZeroRows_MPIBAIJ, 1983 0, 1984 0, 1985 0, 1986 0, 1987 /*30*/ MatSetUpPreallocation_MPIBAIJ, 1988 0, 1989 0, 1990 0, 1991 0, 1992 /*35*/ MatDuplicate_MPIBAIJ, 1993 0, 1994 0, 1995 0, 1996 0, 1997 /*40*/ MatAXPY_MPIBAIJ, 1998 MatGetSubMatrices_MPIBAIJ, 1999 MatIncreaseOverlap_MPIBAIJ, 2000 MatGetValues_MPIBAIJ, 2001 MatCopy_MPIBAIJ, 2002 /*45*/ 0, 2003 MatScale_MPIBAIJ, 2004 0, 2005 0, 2006 0, 2007 /*50*/ 0, 2008 0, 2009 0, 2010 0, 2011 0, 2012 /*55*/ 0, 2013 0, 2014 MatSetUnfactored_MPIBAIJ, 2015 0, 2016 MatSetValuesBlocked_MPIBAIJ, 2017 /*60*/ 0, 2018 MatDestroy_MPIBAIJ, 2019 MatView_MPIBAIJ, 2020 0, 2021 0, 2022 /*65*/ 0, 2023 0, 2024 0, 2025 0, 2026 0, 2027 /*70*/ MatGetRowMax_MPIBAIJ, 2028 0, 2029 0, 2030 0, 2031 0, 2032 /*75*/ 0, 2033 0, 2034 0, 2035 0, 2036 0, 2037 /*80*/ 0, 2038 0, 2039 0, 2040 0, 2041 MatLoad_MPIBAIJ, 2042 /*85*/ 0, 2043 0, 2044 0, 2045 0, 2046 0, 2047 /*90*/ 0, 2048 0, 2049 0, 2050 0, 2051 0, 2052 /*95*/ 0, 2053 0, 2054 0, 2055 0, 2056 0, 2057 /*100*/0, 2058 0, 2059 0, 2060 0, 2061 0, 2062 /*105*/0, 2063 MatRealPart_MPIBAIJ, 2064 MatImaginaryPart_MPIBAIJ}; 2065 2066 2067 EXTERN_C_BEGIN 2068 #undef __FUNCT__ 2069 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2070 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2071 { 2072 PetscFunctionBegin; 2073 *a = ((Mat_MPIBAIJ *)A->data)->A; 2074 *iscopy = PETSC_FALSE; 2075 PetscFunctionReturn(0); 2076 } 2077 EXTERN_C_END 2078 2079 EXTERN_C_BEGIN 2080 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2081 EXTERN_C_END 2082 2083 #undef __FUNCT__ 2084 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2085 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 2086 { 2087 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)B->data; 2088 PetscInt m = B->rmap.n/bs,cstart = baij->cstartbs, cend = baij->cendbs,j,nnz,i,d; 2089 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = baij->rstartbs,ii; 2090 const PetscInt *JJ; 2091 PetscScalar *values; 2092 PetscErrorCode ierr; 2093 2094 PetscFunctionBegin; 2095 #if defined(PETSC_OPT_g) 2096 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"Ii[0] must be 0 it is %D",Ii[0]); 2097 #endif 2098 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2099 o_nnz = d_nnz + m; 2100 2101 for (i=0; i<m; i++) { 2102 nnz = Ii[i+1]- Ii[i]; 2103 JJ = J + Ii[i]; 2104 nnz_max = PetscMax(nnz_max,nnz); 2105 #if defined(PETSC_OPT_g) 2106 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2107 #endif 2108 for (j=0; j<nnz; j++) { 2109 if (*JJ >= cstart) break; 2110 JJ++; 2111 } 2112 d = 0; 2113 for (; j<nnz; j++) { 2114 if (*JJ++ >= cend) break; 2115 d++; 2116 } 2117 d_nnz[i] = d; 2118 o_nnz[i] = nnz - d; 2119 } 2120 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2121 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2122 2123 if (v) values = (PetscScalar*)v; 2124 else { 2125 ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2126 ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2127 } 2128 2129 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2130 for (i=0; i<m; i++) { 2131 ii = i + rstart; 2132 nnz = Ii[i+1]- Ii[i]; 2133 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2134 } 2135 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2136 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2137 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2138 2139 if (!v) { 2140 ierr = PetscFree(values);CHKERRQ(ierr); 2141 } 2142 PetscFunctionReturn(0); 2143 } 2144 2145 #undef __FUNCT__ 2146 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2147 /*@C 2148 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2149 (the default parallel PETSc format). 2150 2151 Collective on MPI_Comm 2152 2153 Input Parameters: 2154 + A - the matrix 2155 . i - the indices into j for the start of each local row (starts with zero) 2156 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2157 - v - optional values in the matrix 2158 2159 Level: developer 2160 2161 .keywords: matrix, aij, compressed row, sparse, parallel 2162 2163 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2164 @*/ 2165 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2166 { 2167 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2168 2169 PetscFunctionBegin; 2170 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2171 if (f) { 2172 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2173 } 2174 PetscFunctionReturn(0); 2175 } 2176 2177 EXTERN_C_BEGIN 2178 #undef __FUNCT__ 2179 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2180 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2181 { 2182 Mat_MPIBAIJ *b; 2183 PetscErrorCode ierr; 2184 PetscInt i; 2185 2186 PetscFunctionBegin; 2187 B->preallocated = PETSC_TRUE; 2188 ierr = PetscOptionsBegin(B->comm,B->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 2189 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2190 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2191 2192 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2193 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2194 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2195 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2196 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2197 2198 B->rmap.bs = bs; 2199 B->cmap.bs = bs; 2200 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 2201 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 2202 2203 if (d_nnz) { 2204 for (i=0; i<B->rmap.n/bs; i++) { 2205 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2206 } 2207 } 2208 if (o_nnz) { 2209 for (i=0; i<B->rmap.n/bs; i++) { 2210 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2211 } 2212 } 2213 2214 b = (Mat_MPIBAIJ*)B->data; 2215 b->bs2 = bs*bs; 2216 b->mbs = B->rmap.n/bs; 2217 b->nbs = B->cmap.n/bs; 2218 b->Mbs = B->rmap.N/bs; 2219 b->Nbs = B->cmap.N/bs; 2220 2221 for (i=0; i<=b->size; i++) { 2222 b->rangebs[i] = B->rmap.range[i]/bs; 2223 } 2224 b->rstartbs = B->rmap.rstart/bs; 2225 b->rendbs = B->rmap.rend/bs; 2226 b->cstartbs = B->cmap.rstart/bs; 2227 b->cendbs = B->cmap.rend/bs; 2228 2229 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2230 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 2231 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2232 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2233 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2234 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2235 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 2236 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2237 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2238 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2239 2240 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2241 2242 PetscFunctionReturn(0); 2243 } 2244 EXTERN_C_END 2245 2246 EXTERN_C_BEGIN 2247 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2248 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2249 EXTERN_C_END 2250 2251 /*MC 2252 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2253 2254 Options Database Keys: 2255 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2256 . -mat_block_size <bs> - set the blocksize used to store the matrix 2257 - -mat_use_hash_table <fact> 2258 2259 Level: beginner 2260 2261 .seealso: MatCreateMPIBAIJ 2262 M*/ 2263 2264 EXTERN_C_BEGIN 2265 #undef __FUNCT__ 2266 #define __FUNCT__ "MatCreate_MPIBAIJ" 2267 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 2268 { 2269 Mat_MPIBAIJ *b; 2270 PetscErrorCode ierr; 2271 PetscTruth flg; 2272 2273 PetscFunctionBegin; 2274 ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2275 B->data = (void*)b; 2276 2277 2278 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2279 B->mapping = 0; 2280 B->factor = 0; 2281 B->assembled = PETSC_FALSE; 2282 2283 B->insertmode = NOT_SET_VALUES; 2284 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 2285 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 2286 2287 /* build local table of row and column ownerships */ 2288 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 2289 2290 /* build cache for off array entries formed */ 2291 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2292 b->donotstash = PETSC_FALSE; 2293 b->colmap = PETSC_NULL; 2294 b->garray = PETSC_NULL; 2295 b->roworiented = PETSC_TRUE; 2296 2297 #if defined(PETSC_USE_MAT_SINGLE) 2298 /* stuff for MatSetValues_XXX in single precision */ 2299 b->setvalueslen = 0; 2300 b->setvaluescopy = PETSC_NULL; 2301 #endif 2302 2303 /* stuff used in block assembly */ 2304 b->barray = 0; 2305 2306 /* stuff used for matrix vector multiply */ 2307 b->lvec = 0; 2308 b->Mvctx = 0; 2309 2310 /* stuff for MatGetRow() */ 2311 b->rowindices = 0; 2312 b->rowvalues = 0; 2313 b->getrowactive = PETSC_FALSE; 2314 2315 /* hash table stuff */ 2316 b->ht = 0; 2317 b->hd = 0; 2318 b->ht_size = 0; 2319 b->ht_flag = PETSC_FALSE; 2320 b->ht_fact = 0; 2321 b->ht_total_ct = 0; 2322 b->ht_insert_ct = 0; 2323 2324 ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix","Mat");CHKERRQ(ierr); 2325 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2326 if (flg) { 2327 PetscReal fact = 1.39; 2328 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2329 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 2330 if (fact <= 1.0) fact = 1.39; 2331 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2332 ierr = PetscInfo1(0,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2333 } 2334 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2335 2336 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2337 "MatStoreValues_MPIBAIJ", 2338 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2339 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2340 "MatRetrieveValues_MPIBAIJ", 2341 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2342 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2343 "MatGetDiagonalBlock_MPIBAIJ", 2344 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2345 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2346 "MatMPIBAIJSetPreallocation_MPIBAIJ", 2347 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2348 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 2349 "MatMPIBAIJSetPreallocationCSR_MPIAIJ", 2350 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 2351 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 2352 "MatDiagonalScaleLocal_MPIBAIJ", 2353 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2354 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 2355 "MatSetHashTableFactor_MPIBAIJ", 2356 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2357 PetscFunctionReturn(0); 2358 } 2359 EXTERN_C_END 2360 2361 /*MC 2362 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2363 2364 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2365 and MATMPIBAIJ otherwise. 2366 2367 Options Database Keys: 2368 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2369 2370 Level: beginner 2371 2372 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2373 M*/ 2374 2375 EXTERN_C_BEGIN 2376 #undef __FUNCT__ 2377 #define __FUNCT__ "MatCreate_BAIJ" 2378 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 2379 { 2380 PetscErrorCode ierr; 2381 PetscMPIInt size; 2382 2383 PetscFunctionBegin; 2384 ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr); 2385 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2386 if (size == 1) { 2387 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 2388 } else { 2389 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 2390 } 2391 PetscFunctionReturn(0); 2392 } 2393 EXTERN_C_END 2394 2395 #undef __FUNCT__ 2396 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2397 /*@C 2398 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2399 (block compressed row). For good matrix assembly performance 2400 the user should preallocate the matrix storage by setting the parameters 2401 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2402 performance can be increased by more than a factor of 50. 2403 2404 Collective on Mat 2405 2406 Input Parameters: 2407 + A - the matrix 2408 . bs - size of blockk 2409 . d_nz - number of block nonzeros per block row in diagonal portion of local 2410 submatrix (same for all local rows) 2411 . d_nnz - array containing the number of block nonzeros in the various block rows 2412 of the in diagonal portion of the local (possibly different for each block 2413 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2414 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2415 submatrix (same for all local rows). 2416 - o_nnz - array containing the number of nonzeros in the various block rows of the 2417 off-diagonal portion of the local submatrix (possibly different for 2418 each block row) or PETSC_NULL. 2419 2420 If the *_nnz parameter is given then the *_nz parameter is ignored 2421 2422 Options Database Keys: 2423 + -mat_block_size - size of the blocks to use 2424 - -mat_use_hash_table <fact> 2425 2426 Notes: 2427 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2428 than it must be used on all processors that share the object for that argument. 2429 2430 Storage Information: 2431 For a square global matrix we define each processor's diagonal portion 2432 to be its local rows and the corresponding columns (a square submatrix); 2433 each processor's off-diagonal portion encompasses the remainder of the 2434 local matrix (a rectangular submatrix). 2435 2436 The user can specify preallocated storage for the diagonal part of 2437 the local submatrix with either d_nz or d_nnz (not both). Set 2438 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2439 memory allocation. Likewise, specify preallocated storage for the 2440 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2441 2442 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2443 the figure below we depict these three local rows and all columns (0-11). 2444 2445 .vb 2446 0 1 2 3 4 5 6 7 8 9 10 11 2447 ------------------- 2448 row 3 | o o o d d d o o o o o o 2449 row 4 | o o o d d d o o o o o o 2450 row 5 | o o o d d d o o o o o o 2451 ------------------- 2452 .ve 2453 2454 Thus, any entries in the d locations are stored in the d (diagonal) 2455 submatrix, and any entries in the o locations are stored in the 2456 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2457 stored simply in the MATSEQBAIJ format for compressed row storage. 2458 2459 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2460 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2461 In general, for PDE problems in which most nonzeros are near the diagonal, 2462 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2463 or you will get TERRIBLE performance; see the users' manual chapter on 2464 matrices. 2465 2466 Level: intermediate 2467 2468 .keywords: matrix, block, aij, compressed row, sparse, parallel 2469 2470 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 2471 @*/ 2472 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2473 { 2474 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2475 2476 PetscFunctionBegin; 2477 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2478 if (f) { 2479 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2480 } 2481 PetscFunctionReturn(0); 2482 } 2483 2484 #undef __FUNCT__ 2485 #define __FUNCT__ "MatCreateMPIBAIJ" 2486 /*@C 2487 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 2488 (block compressed row). For good matrix assembly performance 2489 the user should preallocate the matrix storage by setting the parameters 2490 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2491 performance can be increased by more than a factor of 50. 2492 2493 Collective on MPI_Comm 2494 2495 Input Parameters: 2496 + comm - MPI communicator 2497 . bs - size of blockk 2498 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2499 This value should be the same as the local size used in creating the 2500 y vector for the matrix-vector product y = Ax. 2501 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2502 This value should be the same as the local size used in creating the 2503 x vector for the matrix-vector product y = Ax. 2504 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2505 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2506 . d_nz - number of nonzero blocks per block row in diagonal portion of local 2507 submatrix (same for all local rows) 2508 . d_nnz - array containing the number of nonzero blocks in the various block rows 2509 of the in diagonal portion of the local (possibly different for each block 2510 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2511 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 2512 submatrix (same for all local rows). 2513 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 2514 off-diagonal portion of the local submatrix (possibly different for 2515 each block row) or PETSC_NULL. 2516 2517 Output Parameter: 2518 . A - the matrix 2519 2520 Options Database Keys: 2521 + -mat_block_size - size of the blocks to use 2522 - -mat_use_hash_table <fact> 2523 2524 Notes: 2525 If the *_nnz parameter is given then the *_nz parameter is ignored 2526 2527 A nonzero block is any block that as 1 or more nonzeros in it 2528 2529 The user MUST specify either the local or global matrix dimensions 2530 (possibly both). 2531 2532 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2533 than it must be used on all processors that share the object for that argument. 2534 2535 Storage Information: 2536 For a square global matrix we define each processor's diagonal portion 2537 to be its local rows and the corresponding columns (a square submatrix); 2538 each processor's off-diagonal portion encompasses the remainder of the 2539 local matrix (a rectangular submatrix). 2540 2541 The user can specify preallocated storage for the diagonal part of 2542 the local submatrix with either d_nz or d_nnz (not both). Set 2543 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2544 memory allocation. Likewise, specify preallocated storage for the 2545 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2546 2547 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2548 the figure below we depict these three local rows and all columns (0-11). 2549 2550 .vb 2551 0 1 2 3 4 5 6 7 8 9 10 11 2552 ------------------- 2553 row 3 | o o o d d d o o o o o o 2554 row 4 | o o o d d d o o o o o o 2555 row 5 | o o o d d d o o o o o o 2556 ------------------- 2557 .ve 2558 2559 Thus, any entries in the d locations are stored in the d (diagonal) 2560 submatrix, and any entries in the o locations are stored in the 2561 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2562 stored simply in the MATSEQBAIJ format for compressed row storage. 2563 2564 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2565 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2566 In general, for PDE problems in which most nonzeros are near the diagonal, 2567 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2568 or you will get TERRIBLE performance; see the users' manual chapter on 2569 matrices. 2570 2571 Level: intermediate 2572 2573 .keywords: matrix, block, aij, compressed row, sparse, parallel 2574 2575 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2576 @*/ 2577 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2578 { 2579 PetscErrorCode ierr; 2580 PetscMPIInt size; 2581 2582 PetscFunctionBegin; 2583 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2584 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2585 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2586 if (size > 1) { 2587 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2588 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2589 } else { 2590 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2591 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2592 } 2593 PetscFunctionReturn(0); 2594 } 2595 2596 #undef __FUNCT__ 2597 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 2598 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2599 { 2600 Mat mat; 2601 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2602 PetscErrorCode ierr; 2603 PetscInt len=0; 2604 2605 PetscFunctionBegin; 2606 *newmat = 0; 2607 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 2608 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2609 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2610 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2611 2612 mat->factor = matin->factor; 2613 mat->preallocated = PETSC_TRUE; 2614 mat->assembled = PETSC_TRUE; 2615 mat->insertmode = NOT_SET_VALUES; 2616 2617 a = (Mat_MPIBAIJ*)mat->data; 2618 mat->rmap.bs = matin->rmap.bs; 2619 a->bs2 = oldmat->bs2; 2620 a->mbs = oldmat->mbs; 2621 a->nbs = oldmat->nbs; 2622 a->Mbs = oldmat->Mbs; 2623 a->Nbs = oldmat->Nbs; 2624 2625 ierr = PetscMapCopy(matin->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2626 ierr = PetscMapCopy(matin->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2627 2628 a->size = oldmat->size; 2629 a->rank = oldmat->rank; 2630 a->donotstash = oldmat->donotstash; 2631 a->roworiented = oldmat->roworiented; 2632 a->rowindices = 0; 2633 a->rowvalues = 0; 2634 a->getrowactive = PETSC_FALSE; 2635 a->barray = 0; 2636 a->rstartbs = oldmat->rstartbs; 2637 a->rendbs = oldmat->rendbs; 2638 a->cstartbs = oldmat->cstartbs; 2639 a->cendbs = oldmat->cendbs; 2640 2641 /* hash table stuff */ 2642 a->ht = 0; 2643 a->hd = 0; 2644 a->ht_size = 0; 2645 a->ht_flag = oldmat->ht_flag; 2646 a->ht_fact = oldmat->ht_fact; 2647 a->ht_total_ct = 0; 2648 a->ht_insert_ct = 0; 2649 2650 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 2651 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2652 ierr = MatStashCreate_Private(matin->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr); 2653 if (oldmat->colmap) { 2654 #if defined (PETSC_USE_CTABLE) 2655 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2656 #else 2657 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2658 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2659 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2660 #endif 2661 } else a->colmap = 0; 2662 2663 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2664 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2665 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2666 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2667 } else a->garray = 0; 2668 2669 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2670 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2671 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2672 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2673 2674 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2675 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2676 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2677 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2678 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2679 *newmat = mat; 2680 2681 PetscFunctionReturn(0); 2682 } 2683 2684 #include "petscsys.h" 2685 2686 #undef __FUNCT__ 2687 #define __FUNCT__ "MatLoad_MPIBAIJ" 2688 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2689 { 2690 Mat A; 2691 PetscErrorCode ierr; 2692 int fd; 2693 PetscInt i,nz,j,rstart,rend; 2694 PetscScalar *vals,*buf; 2695 MPI_Comm comm = ((PetscObject)viewer)->comm; 2696 MPI_Status status; 2697 PetscMPIInt rank,size,maxnz; 2698 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 2699 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 2700 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 2701 PetscMPIInt tag = ((PetscObject)viewer)->tag; 2702 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 2703 PetscInt dcount,kmax,k,nzcount,tmp,mend; 2704 2705 PetscFunctionBegin; 2706 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix","Mat");CHKERRQ(ierr); 2707 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2708 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2709 2710 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2711 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2712 if (!rank) { 2713 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2714 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2715 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2716 } 2717 2718 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2719 M = header[1]; N = header[2]; 2720 2721 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2722 2723 /* 2724 This code adds extra rows to make sure the number of rows is 2725 divisible by the blocksize 2726 */ 2727 Mbs = M/bs; 2728 extra_rows = bs - M + bs*Mbs; 2729 if (extra_rows == bs) extra_rows = 0; 2730 else Mbs++; 2731 if (extra_rows && !rank) { 2732 ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2733 } 2734 2735 /* determine ownership of all rows */ 2736 mbs = Mbs/size + ((Mbs % size) > rank); 2737 m = mbs*bs; 2738 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 2739 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2740 2741 /* process 0 needs enough room for process with most rows */ 2742 if (!rank) { 2743 mmax = rowners[1]; 2744 for (i=2; i<size; i++) { 2745 mmax = PetscMax(mmax,rowners[i]); 2746 } 2747 mmax*=bs; 2748 } else mmax = m; 2749 2750 rowners[0] = 0; 2751 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2752 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2753 rstart = rowners[rank]; 2754 rend = rowners[rank+1]; 2755 2756 /* distribute row lengths to all processors */ 2757 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 2758 if (!rank) { 2759 mend = m; 2760 if (size == 1) mend = mend - extra_rows; 2761 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 2762 for (j=mend; j<m; j++) locrowlens[j] = 1; 2763 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2764 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2765 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2766 for (j=0; j<m; j++) { 2767 procsnz[0] += locrowlens[j]; 2768 } 2769 for (i=1; i<size; i++) { 2770 mend = browners[i+1] - browners[i]; 2771 if (i == size-1) mend = mend - extra_rows; 2772 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 2773 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 2774 /* calculate the number of nonzeros on each processor */ 2775 for (j=0; j<browners[i+1]-browners[i]; j++) { 2776 procsnz[i] += rowlengths[j]; 2777 } 2778 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2779 } 2780 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2781 } else { 2782 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2783 } 2784 2785 if (!rank) { 2786 /* determine max buffer needed and allocate it */ 2787 maxnz = procsnz[0]; 2788 for (i=1; i<size; i++) { 2789 maxnz = PetscMax(maxnz,procsnz[i]); 2790 } 2791 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2792 2793 /* read in my part of the matrix column indices */ 2794 nz = procsnz[0]; 2795 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2796 mycols = ibuf; 2797 if (size == 1) nz -= extra_rows; 2798 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2799 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2800 2801 /* read in every ones (except the last) and ship off */ 2802 for (i=1; i<size-1; i++) { 2803 nz = procsnz[i]; 2804 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2805 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2806 } 2807 /* read in the stuff for the last proc */ 2808 if (size != 1) { 2809 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2810 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2811 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2812 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2813 } 2814 ierr = PetscFree(cols);CHKERRQ(ierr); 2815 } else { 2816 /* determine buffer space needed for message */ 2817 nz = 0; 2818 for (i=0; i<m; i++) { 2819 nz += locrowlens[i]; 2820 } 2821 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2822 mycols = ibuf; 2823 /* receive message of column indices*/ 2824 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2825 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2826 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2827 } 2828 2829 /* loop over local rows, determining number of off diagonal entries */ 2830 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2831 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2832 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2833 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2834 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2835 rowcount = 0; nzcount = 0; 2836 for (i=0; i<mbs; i++) { 2837 dcount = 0; 2838 odcount = 0; 2839 for (j=0; j<bs; j++) { 2840 kmax = locrowlens[rowcount]; 2841 for (k=0; k<kmax; k++) { 2842 tmp = mycols[nzcount++]/bs; 2843 if (!mask[tmp]) { 2844 mask[tmp] = 1; 2845 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2846 else masked1[dcount++] = tmp; 2847 } 2848 } 2849 rowcount++; 2850 } 2851 2852 dlens[i] = dcount; 2853 odlens[i] = odcount; 2854 2855 /* zero out the mask elements we set */ 2856 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2857 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2858 } 2859 2860 /* create our matrix */ 2861 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2862 ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2863 ierr = MatSetType(A,type);CHKERRQ(ierr) 2864 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2865 2866 /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */ 2867 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2868 2869 if (!rank) { 2870 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2871 /* read in my part of the matrix numerical values */ 2872 nz = procsnz[0]; 2873 vals = buf; 2874 mycols = ibuf; 2875 if (size == 1) nz -= extra_rows; 2876 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2877 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2878 2879 /* insert into matrix */ 2880 jj = rstart*bs; 2881 for (i=0; i<m; i++) { 2882 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2883 mycols += locrowlens[i]; 2884 vals += locrowlens[i]; 2885 jj++; 2886 } 2887 /* read in other processors (except the last one) and ship out */ 2888 for (i=1; i<size-1; i++) { 2889 nz = procsnz[i]; 2890 vals = buf; 2891 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2892 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2893 } 2894 /* the last proc */ 2895 if (size != 1){ 2896 nz = procsnz[i] - extra_rows; 2897 vals = buf; 2898 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2899 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2900 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2901 } 2902 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2903 } else { 2904 /* receive numeric values */ 2905 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2906 2907 /* receive message of values*/ 2908 vals = buf; 2909 mycols = ibuf; 2910 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2911 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2912 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2913 2914 /* insert into matrix */ 2915 jj = rstart*bs; 2916 for (i=0; i<m; i++) { 2917 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2918 mycols += locrowlens[i]; 2919 vals += locrowlens[i]; 2920 jj++; 2921 } 2922 } 2923 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2924 ierr = PetscFree(buf);CHKERRQ(ierr); 2925 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2926 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2927 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2928 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2929 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2930 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2931 2932 *newmat = A; 2933 PetscFunctionReturn(0); 2934 } 2935 2936 #undef __FUNCT__ 2937 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2938 /*@ 2939 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2940 2941 Input Parameters: 2942 . mat - the matrix 2943 . fact - factor 2944 2945 Collective on Mat 2946 2947 Level: advanced 2948 2949 Notes: 2950 This can also be set by the command line option: -mat_use_hash_table <fact> 2951 2952 .keywords: matrix, hashtable, factor, HT 2953 2954 .seealso: MatSetOption() 2955 @*/ 2956 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2957 { 2958 PetscErrorCode ierr,(*f)(Mat,PetscReal); 2959 2960 PetscFunctionBegin; 2961 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 2962 if (f) { 2963 ierr = (*f)(mat,fact);CHKERRQ(ierr); 2964 } 2965 PetscFunctionReturn(0); 2966 } 2967 2968 EXTERN_C_BEGIN 2969 #undef __FUNCT__ 2970 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 2971 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 2972 { 2973 Mat_MPIBAIJ *baij; 2974 2975 PetscFunctionBegin; 2976 baij = (Mat_MPIBAIJ*)mat->data; 2977 baij->ht_fact = fact; 2978 PetscFunctionReturn(0); 2979 } 2980 EXTERN_C_END 2981 2982 #undef __FUNCT__ 2983 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 2984 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2985 { 2986 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2987 PetscFunctionBegin; 2988 *Ad = a->A; 2989 *Ao = a->B; 2990 *colmap = a->garray; 2991 PetscFunctionReturn(0); 2992 } 2993