1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14 EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat); 15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar); 16 17 /* UGLY, ugly, ugly 18 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 19 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 20 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 21 converts the entries into single precision and then calls ..._MatScalar() to put them 22 into the single precision data structures. 23 */ 24 #if defined(PETSC_USE_MAT_SINGLE) 25 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 26 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 27 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 28 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 29 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 30 #else 31 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 32 #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 33 #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 34 #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 35 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 36 #endif 37 38 #undef __FUNCT__ 39 #define __FUNCT__ "MatGetRowMax_MPIBAIJ" 40 PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v) 41 { 42 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 43 PetscErrorCode ierr; 44 PetscInt i; 45 PetscScalar *va,*vb; 46 Vec vtmp; 47 48 PetscFunctionBegin; 49 50 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 51 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 52 53 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap.n,&vtmp);CHKERRQ(ierr); 54 ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr); 55 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 56 57 for (i=0; i<A->rmap.n; i++){ 58 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i]; 59 } 60 61 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 62 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 63 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 64 65 PetscFunctionReturn(0); 66 } 67 68 EXTERN_C_BEGIN 69 #undef __FUNCT__ 70 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 71 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 72 { 73 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 74 PetscErrorCode ierr; 75 76 PetscFunctionBegin; 77 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 78 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 79 PetscFunctionReturn(0); 80 } 81 EXTERN_C_END 82 83 EXTERN_C_BEGIN 84 #undef __FUNCT__ 85 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 86 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 87 { 88 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 89 PetscErrorCode ierr; 90 91 PetscFunctionBegin; 92 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 93 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 94 PetscFunctionReturn(0); 95 } 96 EXTERN_C_END 97 98 /* 99 Local utility routine that creates a mapping from the global column 100 number to the local number in the off-diagonal part of the local 101 storage of the matrix. This is done in a non scable way since the 102 length of colmap equals the global matrix length. 103 */ 104 #undef __FUNCT__ 105 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 106 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 107 { 108 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 109 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 110 PetscErrorCode ierr; 111 PetscInt nbs = B->nbs,i,bs=mat->rmap.bs; 112 113 PetscFunctionBegin; 114 #if defined (PETSC_USE_CTABLE) 115 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 116 for (i=0; i<nbs; i++){ 117 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 118 } 119 #else 120 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 121 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 122 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 123 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 124 #endif 125 PetscFunctionReturn(0); 126 } 127 128 #define CHUNKSIZE 10 129 130 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 131 { \ 132 \ 133 brow = row/bs; \ 134 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 135 rmax = aimax[brow]; nrow = ailen[brow]; \ 136 bcol = col/bs; \ 137 ridx = row % bs; cidx = col % bs; \ 138 low = 0; high = nrow; \ 139 while (high-low > 3) { \ 140 t = (low+high)/2; \ 141 if (rp[t] > bcol) high = t; \ 142 else low = t; \ 143 } \ 144 for (_i=low; _i<high; _i++) { \ 145 if (rp[_i] > bcol) break; \ 146 if (rp[_i] == bcol) { \ 147 bap = ap + bs2*_i + bs*cidx + ridx; \ 148 if (addv == ADD_VALUES) *bap += value; \ 149 else *bap = value; \ 150 goto a_noinsert; \ 151 } \ 152 } \ 153 if (a->nonew == 1) goto a_noinsert; \ 154 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 155 MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,aimax,a->nonew); \ 156 N = nrow++ - 1; \ 157 /* shift up all the later entries in this row */ \ 158 for (ii=N; ii>=_i; ii--) { \ 159 rp[ii+1] = rp[ii]; \ 160 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 161 } \ 162 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 163 rp[_i] = bcol; \ 164 ap[bs2*_i + bs*cidx + ridx] = value; \ 165 a_noinsert:; \ 166 ailen[brow] = nrow; \ 167 } 168 169 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 170 { \ 171 brow = row/bs; \ 172 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 173 rmax = bimax[brow]; nrow = bilen[brow]; \ 174 bcol = col/bs; \ 175 ridx = row % bs; cidx = col % bs; \ 176 low = 0; high = nrow; \ 177 while (high-low > 3) { \ 178 t = (low+high)/2; \ 179 if (rp[t] > bcol) high = t; \ 180 else low = t; \ 181 } \ 182 for (_i=low; _i<high; _i++) { \ 183 if (rp[_i] > bcol) break; \ 184 if (rp[_i] == bcol) { \ 185 bap = ap + bs2*_i + bs*cidx + ridx; \ 186 if (addv == ADD_VALUES) *bap += value; \ 187 else *bap = value; \ 188 goto b_noinsert; \ 189 } \ 190 } \ 191 if (b->nonew == 1) goto b_noinsert; \ 192 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 193 MatSeqXAIJReallocateAIJ(b,bs2,nrow,brow,bcol,rmax,ba,bi,bj,b->mbs,rp,ap,bimax,b->nonew); \ 194 CHKMEMQ;\ 195 N = nrow++ - 1; \ 196 /* shift up all the later entries in this row */ \ 197 for (ii=N; ii>=_i; ii--) { \ 198 rp[ii+1] = rp[ii]; \ 199 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 200 } \ 201 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 202 rp[_i] = bcol; \ 203 ap[bs2*_i + bs*cidx + ridx] = value; \ 204 b_noinsert:; \ 205 bilen[brow] = nrow; \ 206 } 207 208 #if defined(PETSC_USE_MAT_SINGLE) 209 #undef __FUNCT__ 210 #define __FUNCT__ "MatSetValues_MPIBAIJ" 211 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 212 { 213 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 214 PetscErrorCode ierr; 215 PetscInt i,N = m*n; 216 MatScalar *vsingle; 217 218 PetscFunctionBegin; 219 if (N > b->setvalueslen) { 220 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 221 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 222 b->setvalueslen = N; 223 } 224 vsingle = b->setvaluescopy; 225 226 for (i=0; i<N; i++) { 227 vsingle[i] = v[i]; 228 } 229 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 230 PetscFunctionReturn(0); 231 } 232 233 #undef __FUNCT__ 234 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 235 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 236 { 237 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 238 PetscErrorCode ierr; 239 PetscInt i,N = m*n*b->bs2; 240 MatScalar *vsingle; 241 242 PetscFunctionBegin; 243 if (N > b->setvalueslen) { 244 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 245 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 246 b->setvalueslen = N; 247 } 248 vsingle = b->setvaluescopy; 249 for (i=0; i<N; i++) { 250 vsingle[i] = v[i]; 251 } 252 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 253 PetscFunctionReturn(0); 254 } 255 256 #undef __FUNCT__ 257 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 258 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 259 { 260 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 261 PetscErrorCode ierr; 262 PetscInt i,N = m*n; 263 MatScalar *vsingle; 264 265 PetscFunctionBegin; 266 if (N > b->setvalueslen) { 267 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 268 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 269 b->setvalueslen = N; 270 } 271 vsingle = b->setvaluescopy; 272 for (i=0; i<N; i++) { 273 vsingle[i] = v[i]; 274 } 275 ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 276 PetscFunctionReturn(0); 277 } 278 279 #undef __FUNCT__ 280 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 281 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 282 { 283 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 284 PetscErrorCode ierr; 285 PetscInt i,N = m*n*b->bs2; 286 MatScalar *vsingle; 287 288 PetscFunctionBegin; 289 if (N > b->setvalueslen) { 290 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 291 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 292 b->setvalueslen = N; 293 } 294 vsingle = b->setvaluescopy; 295 for (i=0; i<N; i++) { 296 vsingle[i] = v[i]; 297 } 298 ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 299 PetscFunctionReturn(0); 300 } 301 #endif 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 305 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 306 { 307 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 308 MatScalar value; 309 PetscTruth roworiented = baij->roworiented; 310 PetscErrorCode ierr; 311 PetscInt i,j,row,col; 312 PetscInt rstart_orig=mat->rmap.rstart; 313 PetscInt rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart; 314 PetscInt cend_orig=mat->cmap.rend,bs=mat->rmap.bs; 315 316 /* Some Variables required in the macro */ 317 Mat A = baij->A; 318 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 319 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 320 MatScalar *aa=a->a; 321 322 Mat B = baij->B; 323 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 324 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 325 MatScalar *ba=b->a; 326 327 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 328 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 329 MatScalar *ap,*bap; 330 331 PetscFunctionBegin; 332 for (i=0; i<m; i++) { 333 if (im[i] < 0) continue; 334 #if defined(PETSC_USE_DEBUG) 335 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 336 #endif 337 if (im[i] >= rstart_orig && im[i] < rend_orig) { 338 row = im[i] - rstart_orig; 339 for (j=0; j<n; j++) { 340 if (in[j] >= cstart_orig && in[j] < cend_orig){ 341 col = in[j] - cstart_orig; 342 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 343 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 344 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 345 } else if (in[j] < 0) continue; 346 #if defined(PETSC_USE_DEBUG) 347 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap.N-1);} 348 #endif 349 else { 350 if (mat->was_assembled) { 351 if (!baij->colmap) { 352 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 353 } 354 #if defined (PETSC_USE_CTABLE) 355 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 356 col = col - 1; 357 #else 358 col = baij->colmap[in[j]/bs] - 1; 359 #endif 360 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 361 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 362 col = in[j]; 363 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 364 B = baij->B; 365 b = (Mat_SeqBAIJ*)(B)->data; 366 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 367 ba=b->a; 368 } else col += in[j]%bs; 369 } else col = in[j]; 370 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 371 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 372 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 373 } 374 } 375 } else { 376 if (!baij->donotstash) { 377 if (roworiented) { 378 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 379 } else { 380 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 381 } 382 } 383 } 384 } 385 PetscFunctionReturn(0); 386 } 387 388 #undef __FUNCT__ 389 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar" 390 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 391 { 392 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 393 const MatScalar *value; 394 MatScalar *barray=baij->barray; 395 PetscTruth roworiented = baij->roworiented; 396 PetscErrorCode ierr; 397 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 398 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 399 PetscInt cend=baij->cendbs,bs=mat->rmap.bs,bs2=baij->bs2; 400 401 PetscFunctionBegin; 402 if(!barray) { 403 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 404 baij->barray = barray; 405 } 406 407 if (roworiented) { 408 stepval = (n-1)*bs; 409 } else { 410 stepval = (m-1)*bs; 411 } 412 for (i=0; i<m; i++) { 413 if (im[i] < 0) continue; 414 #if defined(PETSC_USE_DEBUG) 415 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 416 #endif 417 if (im[i] >= rstart && im[i] < rend) { 418 row = im[i] - rstart; 419 for (j=0; j<n; j++) { 420 /* If NumCol = 1 then a copy is not required */ 421 if ((roworiented) && (n == 1)) { 422 barray = (MatScalar*)v + i*bs2; 423 } else if((!roworiented) && (m == 1)) { 424 barray = (MatScalar*)v + j*bs2; 425 } else { /* Here a copy is required */ 426 if (roworiented) { 427 value = v + i*(stepval+bs)*bs + j*bs; 428 } else { 429 value = v + j*(stepval+bs)*bs + i*bs; 430 } 431 for (ii=0; ii<bs; ii++,value+=stepval) { 432 for (jj=0; jj<bs; jj++) { 433 *barray++ = *value++; 434 } 435 } 436 barray -=bs2; 437 } 438 439 if (in[j] >= cstart && in[j] < cend){ 440 col = in[j] - cstart; 441 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 442 } 443 else if (in[j] < 0) continue; 444 #if defined(PETSC_USE_DEBUG) 445 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 446 #endif 447 else { 448 if (mat->was_assembled) { 449 if (!baij->colmap) { 450 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 451 } 452 453 #if defined(PETSC_USE_DEBUG) 454 #if defined (PETSC_USE_CTABLE) 455 { PetscInt data; 456 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 457 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 458 } 459 #else 460 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 461 #endif 462 #endif 463 #if defined (PETSC_USE_CTABLE) 464 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 465 col = (col - 1)/bs; 466 #else 467 col = (baij->colmap[in[j]] - 1)/bs; 468 #endif 469 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 470 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 471 col = in[j]; 472 } 473 } 474 else col = in[j]; 475 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 476 } 477 } 478 } else { 479 if (!baij->donotstash) { 480 if (roworiented) { 481 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 482 } else { 483 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 484 } 485 } 486 } 487 } 488 PetscFunctionReturn(0); 489 } 490 491 #define HASH_KEY 0.6180339887 492 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 493 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 494 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 495 #undef __FUNCT__ 496 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar" 497 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 498 { 499 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 500 PetscTruth roworiented = baij->roworiented; 501 PetscErrorCode ierr; 502 PetscInt i,j,row,col; 503 PetscInt rstart_orig=mat->rmap.rstart; 504 PetscInt rend_orig=mat->rmap.rend,Nbs=baij->Nbs; 505 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap.bs,*HT=baij->ht,idx; 506 PetscReal tmp; 507 MatScalar **HD = baij->hd,value; 508 #if defined(PETSC_USE_DEBUG) 509 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 510 #endif 511 512 PetscFunctionBegin; 513 514 for (i=0; i<m; i++) { 515 #if defined(PETSC_USE_DEBUG) 516 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 517 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 518 #endif 519 row = im[i]; 520 if (row >= rstart_orig && row < rend_orig) { 521 for (j=0; j<n; j++) { 522 col = in[j]; 523 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 524 /* Look up PetscInto the Hash Table */ 525 key = (row/bs)*Nbs+(col/bs)+1; 526 h1 = HASH(size,key,tmp); 527 528 529 idx = h1; 530 #if defined(PETSC_USE_DEBUG) 531 insert_ct++; 532 total_ct++; 533 if (HT[idx] != key) { 534 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 535 if (idx == size) { 536 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 537 if (idx == h1) { 538 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 539 } 540 } 541 } 542 #else 543 if (HT[idx] != key) { 544 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 545 if (idx == size) { 546 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 547 if (idx == h1) { 548 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 549 } 550 } 551 } 552 #endif 553 /* A HASH table entry is found, so insert the values at the correct address */ 554 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 555 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 556 } 557 } else { 558 if (!baij->donotstash) { 559 if (roworiented) { 560 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 561 } else { 562 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 563 } 564 } 565 } 566 } 567 #if defined(PETSC_USE_DEBUG) 568 baij->ht_total_ct = total_ct; 569 baij->ht_insert_ct = insert_ct; 570 #endif 571 PetscFunctionReturn(0); 572 } 573 574 #undef __FUNCT__ 575 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 576 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 577 { 578 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 579 PetscTruth roworiented = baij->roworiented; 580 PetscErrorCode ierr; 581 PetscInt i,j,ii,jj,row,col; 582 PetscInt rstart=baij->rstartbs; 583 PetscInt rend=mat->rmap.rend,stepval,bs=mat->rmap.bs,bs2=baij->bs2; 584 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 585 PetscReal tmp; 586 MatScalar **HD = baij->hd,*baij_a; 587 const MatScalar *v_t,*value; 588 #if defined(PETSC_USE_DEBUG) 589 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 590 #endif 591 592 PetscFunctionBegin; 593 594 if (roworiented) { 595 stepval = (n-1)*bs; 596 } else { 597 stepval = (m-1)*bs; 598 } 599 for (i=0; i<m; i++) { 600 #if defined(PETSC_USE_DEBUG) 601 if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 602 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 603 #endif 604 row = im[i]; 605 v_t = v + i*bs2; 606 if (row >= rstart && row < rend) { 607 for (j=0; j<n; j++) { 608 col = in[j]; 609 610 /* Look up into the Hash Table */ 611 key = row*Nbs+col+1; 612 h1 = HASH(size,key,tmp); 613 614 idx = h1; 615 #if defined(PETSC_USE_DEBUG) 616 total_ct++; 617 insert_ct++; 618 if (HT[idx] != key) { 619 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 620 if (idx == size) { 621 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 622 if (idx == h1) { 623 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 624 } 625 } 626 } 627 #else 628 if (HT[idx] != key) { 629 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 630 if (idx == size) { 631 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 632 if (idx == h1) { 633 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 634 } 635 } 636 } 637 #endif 638 baij_a = HD[idx]; 639 if (roworiented) { 640 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 641 /* value = v + (i*(stepval+bs)+j)*bs; */ 642 value = v_t; 643 v_t += bs; 644 if (addv == ADD_VALUES) { 645 for (ii=0; ii<bs; ii++,value+=stepval) { 646 for (jj=ii; jj<bs2; jj+=bs) { 647 baij_a[jj] += *value++; 648 } 649 } 650 } else { 651 for (ii=0; ii<bs; ii++,value+=stepval) { 652 for (jj=ii; jj<bs2; jj+=bs) { 653 baij_a[jj] = *value++; 654 } 655 } 656 } 657 } else { 658 value = v + j*(stepval+bs)*bs + i*bs; 659 if (addv == ADD_VALUES) { 660 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 661 for (jj=0; jj<bs; jj++) { 662 baij_a[jj] += *value++; 663 } 664 } 665 } else { 666 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 667 for (jj=0; jj<bs; jj++) { 668 baij_a[jj] = *value++; 669 } 670 } 671 } 672 } 673 } 674 } else { 675 if (!baij->donotstash) { 676 if (roworiented) { 677 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 678 } else { 679 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 680 } 681 } 682 } 683 } 684 #if defined(PETSC_USE_DEBUG) 685 baij->ht_total_ct = total_ct; 686 baij->ht_insert_ct = insert_ct; 687 #endif 688 PetscFunctionReturn(0); 689 } 690 691 #undef __FUNCT__ 692 #define __FUNCT__ "MatGetValues_MPIBAIJ" 693 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 694 { 695 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 696 PetscErrorCode ierr; 697 PetscInt bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend; 698 PetscInt bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data; 699 700 PetscFunctionBegin; 701 for (i=0; i<m; i++) { 702 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 703 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 704 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 705 row = idxm[i] - bsrstart; 706 for (j=0; j<n; j++) { 707 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 708 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 709 if (idxn[j] >= bscstart && idxn[j] < bscend){ 710 col = idxn[j] - bscstart; 711 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 712 } else { 713 if (!baij->colmap) { 714 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 715 } 716 #if defined (PETSC_USE_CTABLE) 717 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 718 data --; 719 #else 720 data = baij->colmap[idxn[j]/bs]-1; 721 #endif 722 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 723 else { 724 col = data + idxn[j]%bs; 725 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 726 } 727 } 728 } 729 } else { 730 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 731 } 732 } 733 PetscFunctionReturn(0); 734 } 735 736 #undef __FUNCT__ 737 #define __FUNCT__ "MatNorm_MPIBAIJ" 738 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 739 { 740 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 741 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 742 PetscErrorCode ierr; 743 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap.bs,nz,row,col; 744 PetscReal sum = 0.0; 745 MatScalar *v; 746 747 PetscFunctionBegin; 748 if (baij->size == 1) { 749 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 750 } else { 751 if (type == NORM_FROBENIUS) { 752 v = amat->a; 753 nz = amat->nz*bs2; 754 for (i=0; i<nz; i++) { 755 #if defined(PETSC_USE_COMPLEX) 756 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 757 #else 758 sum += (*v)*(*v); v++; 759 #endif 760 } 761 v = bmat->a; 762 nz = bmat->nz*bs2; 763 for (i=0; i<nz; i++) { 764 #if defined(PETSC_USE_COMPLEX) 765 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 766 #else 767 sum += (*v)*(*v); v++; 768 #endif 769 } 770 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 771 *nrm = sqrt(*nrm); 772 } else if (type == NORM_1) { /* max column sum */ 773 PetscReal *tmp,*tmp2; 774 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 775 ierr = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 776 tmp2 = tmp + mat->cmap.N; 777 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 778 v = amat->a; jj = amat->j; 779 for (i=0; i<amat->nz; i++) { 780 for (j=0; j<bs; j++){ 781 col = bs*(cstart + *jj) + j; /* column index */ 782 for (row=0; row<bs; row++){ 783 tmp[col] += PetscAbsScalar(*v); v++; 784 } 785 } 786 jj++; 787 } 788 v = bmat->a; jj = bmat->j; 789 for (i=0; i<bmat->nz; i++) { 790 for (j=0; j<bs; j++){ 791 col = bs*garray[*jj] + j; 792 for (row=0; row<bs; row++){ 793 tmp[col] += PetscAbsScalar(*v); v++; 794 } 795 } 796 jj++; 797 } 798 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 799 *nrm = 0.0; 800 for (j=0; j<mat->cmap.N; j++) { 801 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 802 } 803 ierr = PetscFree(tmp);CHKERRQ(ierr); 804 } else if (type == NORM_INFINITY) { /* max row sum */ 805 PetscReal *sums; 806 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr) 807 sum = 0.0; 808 for (j=0; j<amat->mbs; j++) { 809 for (row=0; row<bs; row++) sums[row] = 0.0; 810 v = amat->a + bs2*amat->i[j]; 811 nz = amat->i[j+1]-amat->i[j]; 812 for (i=0; i<nz; i++) { 813 for (col=0; col<bs; col++){ 814 for (row=0; row<bs; row++){ 815 sums[row] += PetscAbsScalar(*v); v++; 816 } 817 } 818 } 819 v = bmat->a + bs2*bmat->i[j]; 820 nz = bmat->i[j+1]-bmat->i[j]; 821 for (i=0; i<nz; i++) { 822 for (col=0; col<bs; col++){ 823 for (row=0; row<bs; row++){ 824 sums[row] += PetscAbsScalar(*v); v++; 825 } 826 } 827 } 828 for (row=0; row<bs; row++){ 829 if (sums[row] > sum) sum = sums[row]; 830 } 831 } 832 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 833 ierr = PetscFree(sums);CHKERRQ(ierr); 834 } else { 835 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 836 } 837 } 838 PetscFunctionReturn(0); 839 } 840 841 /* 842 Creates the hash table, and sets the table 843 This table is created only once. 844 If new entried need to be added to the matrix 845 then the hash table has to be destroyed and 846 recreated. 847 */ 848 #undef __FUNCT__ 849 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 850 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 851 { 852 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 853 Mat A = baij->A,B=baij->B; 854 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 855 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 856 PetscErrorCode ierr; 857 PetscInt size,bs2=baij->bs2,rstart=baij->rstartbs; 858 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 859 PetscInt *HT,key; 860 MatScalar **HD; 861 PetscReal tmp; 862 #if defined(PETSC_USE_INFO) 863 PetscInt ct=0,max=0; 864 #endif 865 866 PetscFunctionBegin; 867 baij->ht_size=(PetscInt)(factor*nz); 868 size = baij->ht_size; 869 870 if (baij->ht) { 871 PetscFunctionReturn(0); 872 } 873 874 /* Allocate Memory for Hash Table */ 875 ierr = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 876 baij->ht = (PetscInt*)(baij->hd + size); 877 HD = baij->hd; 878 HT = baij->ht; 879 880 881 ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr); 882 883 884 /* Loop Over A */ 885 for (i=0; i<a->mbs; i++) { 886 for (j=ai[i]; j<ai[i+1]; j++) { 887 row = i+rstart; 888 col = aj[j]+cstart; 889 890 key = row*Nbs + col + 1; 891 h1 = HASH(size,key,tmp); 892 for (k=0; k<size; k++){ 893 if (!HT[(h1+k)%size]) { 894 HT[(h1+k)%size] = key; 895 HD[(h1+k)%size] = a->a + j*bs2; 896 break; 897 #if defined(PETSC_USE_INFO) 898 } else { 899 ct++; 900 #endif 901 } 902 } 903 #if defined(PETSC_USE_INFO) 904 if (k> max) max = k; 905 #endif 906 } 907 } 908 /* Loop Over B */ 909 for (i=0; i<b->mbs; i++) { 910 for (j=bi[i]; j<bi[i+1]; j++) { 911 row = i+rstart; 912 col = garray[bj[j]]; 913 key = row*Nbs + col + 1; 914 h1 = HASH(size,key,tmp); 915 for (k=0; k<size; k++){ 916 if (!HT[(h1+k)%size]) { 917 HT[(h1+k)%size] = key; 918 HD[(h1+k)%size] = b->a + j*bs2; 919 break; 920 #if defined(PETSC_USE_INFO) 921 } else { 922 ct++; 923 #endif 924 } 925 } 926 #if defined(PETSC_USE_INFO) 927 if (k> max) max = k; 928 #endif 929 } 930 } 931 932 /* Print Summary */ 933 #if defined(PETSC_USE_INFO) 934 for (i=0,j=0; i<size; i++) { 935 if (HT[i]) {j++;} 936 } 937 ierr = PetscInfo2(0,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 938 #endif 939 PetscFunctionReturn(0); 940 } 941 942 #undef __FUNCT__ 943 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 944 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 945 { 946 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 947 PetscErrorCode ierr; 948 PetscInt nstash,reallocs; 949 InsertMode addv; 950 951 PetscFunctionBegin; 952 if (baij->donotstash) { 953 PetscFunctionReturn(0); 954 } 955 956 /* make sure all processors are either in INSERTMODE or ADDMODE */ 957 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 958 if (addv == (ADD_VALUES|INSERT_VALUES)) { 959 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 960 } 961 mat->insertmode = addv; /* in case this processor had no cache */ 962 963 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 964 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rangebs);CHKERRQ(ierr); 965 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 966 ierr = PetscInfo2(0,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 967 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 968 ierr = PetscInfo2(0,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 969 PetscFunctionReturn(0); 970 } 971 972 #undef __FUNCT__ 973 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 974 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 975 { 976 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 977 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 978 PetscErrorCode ierr; 979 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 980 PetscInt *row,*col,other_disassembled; 981 PetscTruth r1,r2,r3; 982 MatScalar *val; 983 InsertMode addv = mat->insertmode; 984 PetscMPIInt n; 985 986 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 987 PetscFunctionBegin; 988 if (!baij->donotstash) { 989 while (1) { 990 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 991 if (!flg) break; 992 993 for (i=0; i<n;) { 994 /* Now identify the consecutive vals belonging to the same row */ 995 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 996 if (j < n) ncols = j-i; 997 else ncols = n-i; 998 /* Now assemble all these values with a single function call */ 999 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 1000 i = j; 1001 } 1002 } 1003 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 1004 /* Now process the block-stash. Since the values are stashed column-oriented, 1005 set the roworiented flag to column oriented, and after MatSetValues() 1006 restore the original flags */ 1007 r1 = baij->roworiented; 1008 r2 = a->roworiented; 1009 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 1010 baij->roworiented = PETSC_FALSE; 1011 a->roworiented = PETSC_FALSE; 1012 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 1013 while (1) { 1014 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1015 if (!flg) break; 1016 1017 for (i=0; i<n;) { 1018 /* Now identify the consecutive vals belonging to the same row */ 1019 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1020 if (j < n) ncols = j-i; 1021 else ncols = n-i; 1022 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1023 i = j; 1024 } 1025 } 1026 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1027 baij->roworiented = r1; 1028 a->roworiented = r2; 1029 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 1030 } 1031 1032 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1033 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1034 1035 /* determine if any processor has disassembled, if so we must 1036 also disassemble ourselfs, in order that we may reassemble. */ 1037 /* 1038 if nonzero structure of submatrix B cannot change then we know that 1039 no processor disassembled thus we can skip this stuff 1040 */ 1041 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1042 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1043 if (mat->was_assembled && !other_disassembled) { 1044 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1045 } 1046 } 1047 1048 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1049 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1050 } 1051 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 1052 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1053 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1054 1055 #if defined(PETSC_USE_INFO) 1056 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1057 ierr = PetscInfo1(0,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 1058 baij->ht_total_ct = 0; 1059 baij->ht_insert_ct = 0; 1060 } 1061 #endif 1062 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1063 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1064 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1065 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1066 } 1067 1068 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1069 baij->rowvalues = 0; 1070 PetscFunctionReturn(0); 1071 } 1072 1073 #undef __FUNCT__ 1074 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 1075 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1076 { 1077 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1078 PetscErrorCode ierr; 1079 PetscMPIInt size = baij->size,rank = baij->rank; 1080 PetscInt bs = mat->rmap.bs; 1081 PetscTruth iascii,isdraw; 1082 PetscViewer sviewer; 1083 PetscViewerFormat format; 1084 1085 PetscFunctionBegin; 1086 /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */ 1087 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1088 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1089 if (iascii) { 1090 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1091 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1092 MatInfo info; 1093 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1094 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1095 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 1096 rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 1097 mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr); 1098 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1099 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1100 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1101 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1102 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1103 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1104 PetscFunctionReturn(0); 1105 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1106 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1107 PetscFunctionReturn(0); 1108 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1109 PetscFunctionReturn(0); 1110 } 1111 } 1112 1113 if (isdraw) { 1114 PetscDraw draw; 1115 PetscTruth isnull; 1116 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1117 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1118 } 1119 1120 if (size == 1) { 1121 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 1122 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1123 } else { 1124 /* assemble the entire matrix onto first processor. */ 1125 Mat A; 1126 Mat_SeqBAIJ *Aloc; 1127 PetscInt M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1128 MatScalar *a; 1129 1130 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1131 /* Perhaps this should be the type of mat? */ 1132 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 1133 if (!rank) { 1134 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1135 } else { 1136 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1137 } 1138 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1139 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1140 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1141 1142 /* copy over the A part */ 1143 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1144 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1145 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1146 1147 for (i=0; i<mbs; i++) { 1148 rvals[0] = bs*(baij->rstartbs + i); 1149 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1150 for (j=ai[i]; j<ai[i+1]; j++) { 1151 col = (baij->cstartbs+aj[j])*bs; 1152 for (k=0; k<bs; k++) { 1153 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1154 col++; a += bs; 1155 } 1156 } 1157 } 1158 /* copy over the B part */ 1159 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1160 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1161 for (i=0; i<mbs; i++) { 1162 rvals[0] = bs*(baij->rstartbs + i); 1163 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1164 for (j=ai[i]; j<ai[i+1]; j++) { 1165 col = baij->garray[aj[j]]*bs; 1166 for (k=0; k<bs; k++) { 1167 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1168 col++; a += bs; 1169 } 1170 } 1171 } 1172 ierr = PetscFree(rvals);CHKERRQ(ierr); 1173 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1174 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1175 /* 1176 Everyone has to call to draw the matrix since the graphics waits are 1177 synchronized across all processors that share the PetscDraw object 1178 */ 1179 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1180 if (!rank) { 1181 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1182 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1183 } 1184 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1185 ierr = MatDestroy(A);CHKERRQ(ierr); 1186 } 1187 PetscFunctionReturn(0); 1188 } 1189 1190 #undef __FUNCT__ 1191 #define __FUNCT__ "MatView_MPIBAIJ" 1192 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1193 { 1194 PetscErrorCode ierr; 1195 PetscTruth iascii,isdraw,issocket,isbinary; 1196 1197 PetscFunctionBegin; 1198 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1199 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1200 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1201 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1202 if (iascii || isdraw || issocket || isbinary) { 1203 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1204 } else { 1205 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1206 } 1207 PetscFunctionReturn(0); 1208 } 1209 1210 #undef __FUNCT__ 1211 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1212 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1213 { 1214 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1215 PetscErrorCode ierr; 1216 1217 PetscFunctionBegin; 1218 #if defined(PETSC_USE_LOG) 1219 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N); 1220 #endif 1221 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1222 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1223 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1224 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1225 #if defined (PETSC_USE_CTABLE) 1226 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1227 #else 1228 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1229 #endif 1230 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1231 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1232 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1233 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1234 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1235 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 1236 #if defined(PETSC_USE_MAT_SINGLE) 1237 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 1238 #endif 1239 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1240 ierr = PetscFree(baij);CHKERRQ(ierr); 1241 1242 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1243 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1244 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1245 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1246 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1247 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1248 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1249 PetscFunctionReturn(0); 1250 } 1251 1252 #undef __FUNCT__ 1253 #define __FUNCT__ "MatMult_MPIBAIJ" 1254 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1255 { 1256 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1257 PetscErrorCode ierr; 1258 PetscInt nt; 1259 1260 PetscFunctionBegin; 1261 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1262 if (nt != A->cmap.n) { 1263 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1264 } 1265 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1266 if (nt != A->rmap.n) { 1267 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1268 } 1269 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1270 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1271 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1272 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1273 PetscFunctionReturn(0); 1274 } 1275 1276 #undef __FUNCT__ 1277 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1278 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1279 { 1280 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1281 PetscErrorCode ierr; 1282 1283 PetscFunctionBegin; 1284 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1285 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1286 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1287 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1288 PetscFunctionReturn(0); 1289 } 1290 1291 #undef __FUNCT__ 1292 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1293 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1294 { 1295 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1296 PetscErrorCode ierr; 1297 PetscTruth merged; 1298 1299 PetscFunctionBegin; 1300 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1301 /* do nondiagonal part */ 1302 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1303 if (!merged) { 1304 /* send it on its way */ 1305 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1306 /* do local part */ 1307 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1308 /* receive remote parts: note this assumes the values are not actually */ 1309 /* inserted in yy until the next line */ 1310 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1311 } else { 1312 /* do local part */ 1313 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1314 /* send it on its way */ 1315 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1316 /* values actually were received in the Begin() but we need to call this nop */ 1317 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1318 } 1319 PetscFunctionReturn(0); 1320 } 1321 1322 #undef __FUNCT__ 1323 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1324 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1325 { 1326 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1327 PetscErrorCode ierr; 1328 1329 PetscFunctionBegin; 1330 /* do nondiagonal part */ 1331 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1332 /* send it on its way */ 1333 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1334 /* do local part */ 1335 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1336 /* receive remote parts: note this assumes the values are not actually */ 1337 /* inserted in yy until the next line, which is true for my implementation*/ 1338 /* but is not perhaps always true. */ 1339 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1340 PetscFunctionReturn(0); 1341 } 1342 1343 /* 1344 This only works correctly for square matrices where the subblock A->A is the 1345 diagonal block 1346 */ 1347 #undef __FUNCT__ 1348 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1349 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1350 { 1351 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1352 PetscErrorCode ierr; 1353 1354 PetscFunctionBegin; 1355 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1356 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1357 PetscFunctionReturn(0); 1358 } 1359 1360 #undef __FUNCT__ 1361 #define __FUNCT__ "MatScale_MPIBAIJ" 1362 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1363 { 1364 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1365 PetscErrorCode ierr; 1366 1367 PetscFunctionBegin; 1368 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1369 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1370 PetscFunctionReturn(0); 1371 } 1372 1373 #undef __FUNCT__ 1374 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1375 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1376 { 1377 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1378 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1379 PetscErrorCode ierr; 1380 PetscInt bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1381 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend; 1382 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1383 1384 PetscFunctionBegin; 1385 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1386 mat->getrowactive = PETSC_TRUE; 1387 1388 if (!mat->rowvalues && (idx || v)) { 1389 /* 1390 allocate enough space to hold information from the longest row. 1391 */ 1392 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1393 PetscInt max = 1,mbs = mat->mbs,tmp; 1394 for (i=0; i<mbs; i++) { 1395 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1396 if (max < tmp) { max = tmp; } 1397 } 1398 ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1399 mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1400 } 1401 1402 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1403 lrow = row - brstart; 1404 1405 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1406 if (!v) {pvA = 0; pvB = 0;} 1407 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1408 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1409 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1410 nztot = nzA + nzB; 1411 1412 cmap = mat->garray; 1413 if (v || idx) { 1414 if (nztot) { 1415 /* Sort by increasing column numbers, assuming A and B already sorted */ 1416 PetscInt imark = -1; 1417 if (v) { 1418 *v = v_p = mat->rowvalues; 1419 for (i=0; i<nzB; i++) { 1420 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1421 else break; 1422 } 1423 imark = i; 1424 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1425 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1426 } 1427 if (idx) { 1428 *idx = idx_p = mat->rowindices; 1429 if (imark > -1) { 1430 for (i=0; i<imark; i++) { 1431 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1432 } 1433 } else { 1434 for (i=0; i<nzB; i++) { 1435 if (cmap[cworkB[i]/bs] < cstart) 1436 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1437 else break; 1438 } 1439 imark = i; 1440 } 1441 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1442 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1443 } 1444 } else { 1445 if (idx) *idx = 0; 1446 if (v) *v = 0; 1447 } 1448 } 1449 *nz = nztot; 1450 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1451 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1452 PetscFunctionReturn(0); 1453 } 1454 1455 #undef __FUNCT__ 1456 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1457 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1458 { 1459 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1460 1461 PetscFunctionBegin; 1462 if (!baij->getrowactive) { 1463 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1464 } 1465 baij->getrowactive = PETSC_FALSE; 1466 PetscFunctionReturn(0); 1467 } 1468 1469 #undef __FUNCT__ 1470 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1471 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1472 { 1473 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1474 PetscErrorCode ierr; 1475 1476 PetscFunctionBegin; 1477 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1478 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1479 PetscFunctionReturn(0); 1480 } 1481 1482 #undef __FUNCT__ 1483 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1484 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1485 { 1486 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1487 Mat A = a->A,B = a->B; 1488 PetscErrorCode ierr; 1489 PetscReal isend[5],irecv[5]; 1490 1491 PetscFunctionBegin; 1492 info->block_size = (PetscReal)matin->rmap.bs; 1493 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1494 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1495 isend[3] = info->memory; isend[4] = info->mallocs; 1496 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1497 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1498 isend[3] += info->memory; isend[4] += info->mallocs; 1499 if (flag == MAT_LOCAL) { 1500 info->nz_used = isend[0]; 1501 info->nz_allocated = isend[1]; 1502 info->nz_unneeded = isend[2]; 1503 info->memory = isend[3]; 1504 info->mallocs = isend[4]; 1505 } else if (flag == MAT_GLOBAL_MAX) { 1506 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1507 info->nz_used = irecv[0]; 1508 info->nz_allocated = irecv[1]; 1509 info->nz_unneeded = irecv[2]; 1510 info->memory = irecv[3]; 1511 info->mallocs = irecv[4]; 1512 } else if (flag == MAT_GLOBAL_SUM) { 1513 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1514 info->nz_used = irecv[0]; 1515 info->nz_allocated = irecv[1]; 1516 info->nz_unneeded = irecv[2]; 1517 info->memory = irecv[3]; 1518 info->mallocs = irecv[4]; 1519 } else { 1520 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1521 } 1522 info->rows_global = (PetscReal)A->rmap.N; 1523 info->columns_global = (PetscReal)A->cmap.N; 1524 info->rows_local = (PetscReal)A->rmap.N; 1525 info->columns_local = (PetscReal)A->cmap.N; 1526 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1527 info->fill_ratio_needed = 0; 1528 info->factor_mallocs = 0; 1529 PetscFunctionReturn(0); 1530 } 1531 1532 #undef __FUNCT__ 1533 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1534 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op) 1535 { 1536 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1537 PetscErrorCode ierr; 1538 1539 PetscFunctionBegin; 1540 switch (op) { 1541 case MAT_NO_NEW_NONZERO_LOCATIONS: 1542 case MAT_YES_NEW_NONZERO_LOCATIONS: 1543 case MAT_COLUMNS_UNSORTED: 1544 case MAT_COLUMNS_SORTED: 1545 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1546 case MAT_KEEP_ZEROED_ROWS: 1547 case MAT_NEW_NONZERO_LOCATION_ERR: 1548 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1549 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1550 break; 1551 case MAT_ROW_ORIENTED: 1552 a->roworiented = PETSC_TRUE; 1553 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1554 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1555 break; 1556 case MAT_ROWS_SORTED: 1557 case MAT_ROWS_UNSORTED: 1558 case MAT_YES_NEW_DIAGONALS: 1559 ierr = PetscInfo(A,"Option ignored\n");CHKERRQ(ierr); 1560 break; 1561 case MAT_COLUMN_ORIENTED: 1562 a->roworiented = PETSC_FALSE; 1563 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1564 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1565 break; 1566 case MAT_IGNORE_OFF_PROC_ENTRIES: 1567 a->donotstash = PETSC_TRUE; 1568 break; 1569 case MAT_NO_NEW_DIAGONALS: 1570 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1571 case MAT_USE_HASH_TABLE: 1572 a->ht_flag = PETSC_TRUE; 1573 break; 1574 case MAT_SYMMETRIC: 1575 case MAT_STRUCTURALLY_SYMMETRIC: 1576 case MAT_HERMITIAN: 1577 case MAT_SYMMETRY_ETERNAL: 1578 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1579 break; 1580 case MAT_NOT_SYMMETRIC: 1581 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1582 case MAT_NOT_HERMITIAN: 1583 case MAT_NOT_SYMMETRY_ETERNAL: 1584 break; 1585 default: 1586 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1587 } 1588 PetscFunctionReturn(0); 1589 } 1590 1591 #undef __FUNCT__ 1592 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1593 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1594 { 1595 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1596 Mat_SeqBAIJ *Aloc; 1597 Mat B; 1598 PetscErrorCode ierr; 1599 PetscInt M=A->rmap.N,N=A->cmap.N,*ai,*aj,i,*rvals,j,k,col; 1600 PetscInt bs=A->rmap.bs,mbs=baij->mbs; 1601 MatScalar *a; 1602 1603 PetscFunctionBegin; 1604 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1605 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1606 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1607 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1608 ierr = MatMPIBAIJSetPreallocation(B,A->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1609 1610 /* copy over the A part */ 1611 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1612 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1613 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1614 1615 for (i=0; i<mbs; i++) { 1616 rvals[0] = bs*(baij->rstartbs + i); 1617 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1618 for (j=ai[i]; j<ai[i+1]; j++) { 1619 col = (baij->cstartbs+aj[j])*bs; 1620 for (k=0; k<bs; k++) { 1621 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1622 col++; a += bs; 1623 } 1624 } 1625 } 1626 /* copy over the B part */ 1627 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1628 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1629 for (i=0; i<mbs; i++) { 1630 rvals[0] = bs*(baij->rstartbs + i); 1631 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1632 for (j=ai[i]; j<ai[i+1]; j++) { 1633 col = baij->garray[aj[j]]*bs; 1634 for (k=0; k<bs; k++) { 1635 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1636 col++; a += bs; 1637 } 1638 } 1639 } 1640 ierr = PetscFree(rvals);CHKERRQ(ierr); 1641 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1642 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1643 1644 if (matout) { 1645 *matout = B; 1646 } else { 1647 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1648 } 1649 PetscFunctionReturn(0); 1650 } 1651 1652 #undef __FUNCT__ 1653 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1654 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1655 { 1656 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1657 Mat a = baij->A,b = baij->B; 1658 PetscErrorCode ierr; 1659 PetscInt s1,s2,s3; 1660 1661 PetscFunctionBegin; 1662 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1663 if (rr) { 1664 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1665 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1666 /* Overlap communication with computation. */ 1667 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1668 } 1669 if (ll) { 1670 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1671 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1672 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1673 } 1674 /* scale the diagonal block */ 1675 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1676 1677 if (rr) { 1678 /* Do a scatter end and then right scale the off-diagonal block */ 1679 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1680 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1681 } 1682 1683 PetscFunctionReturn(0); 1684 } 1685 1686 #undef __FUNCT__ 1687 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1688 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 1689 { 1690 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1691 PetscErrorCode ierr; 1692 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1693 PetscInt i,*owners = A->rmap.range; 1694 PetscInt *nprocs,j,idx,nsends,row; 1695 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1696 PetscInt *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1; 1697 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap.rstart; 1698 MPI_Comm comm = A->comm; 1699 MPI_Request *send_waits,*recv_waits; 1700 MPI_Status recv_status,*send_status; 1701 #if defined(PETSC_DEBUG) 1702 PetscTruth found = PETSC_FALSE; 1703 #endif 1704 1705 PetscFunctionBegin; 1706 /* first count number of contributors to each processor */ 1707 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1708 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1709 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1710 j = 0; 1711 for (i=0; i<N; i++) { 1712 if (lastidx > (idx = rows[i])) j = 0; 1713 lastidx = idx; 1714 for (; j<size; j++) { 1715 if (idx >= owners[j] && idx < owners[j+1]) { 1716 nprocs[2*j]++; 1717 nprocs[2*j+1] = 1; 1718 owner[i] = j; 1719 #if defined(PETSC_DEBUG) 1720 found = PETSC_TRUE; 1721 #endif 1722 break; 1723 } 1724 } 1725 #if defined(PETSC_DEBUG) 1726 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1727 found = PETSC_FALSE; 1728 #endif 1729 } 1730 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1731 1732 /* inform other processors of number of messages and max length*/ 1733 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1734 1735 /* post receives: */ 1736 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1737 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1738 for (i=0; i<nrecvs; i++) { 1739 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1740 } 1741 1742 /* do sends: 1743 1) starts[i] gives the starting index in svalues for stuff going to 1744 the ith processor 1745 */ 1746 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1747 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1748 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1749 starts[0] = 0; 1750 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1751 for (i=0; i<N; i++) { 1752 svalues[starts[owner[i]]++] = rows[i]; 1753 } 1754 1755 starts[0] = 0; 1756 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1757 count = 0; 1758 for (i=0; i<size; i++) { 1759 if (nprocs[2*i+1]) { 1760 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1761 } 1762 } 1763 ierr = PetscFree(starts);CHKERRQ(ierr); 1764 1765 base = owners[rank]; 1766 1767 /* wait on receives */ 1768 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 1769 source = lens + nrecvs; 1770 count = nrecvs; slen = 0; 1771 while (count) { 1772 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1773 /* unpack receives into our local space */ 1774 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1775 source[imdex] = recv_status.MPI_SOURCE; 1776 lens[imdex] = n; 1777 slen += n; 1778 count--; 1779 } 1780 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1781 1782 /* move the data into the send scatter */ 1783 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1784 count = 0; 1785 for (i=0; i<nrecvs; i++) { 1786 values = rvalues + i*nmax; 1787 for (j=0; j<lens[i]; j++) { 1788 lrows[count++] = values[j] - base; 1789 } 1790 } 1791 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1792 ierr = PetscFree(lens);CHKERRQ(ierr); 1793 ierr = PetscFree(owner);CHKERRQ(ierr); 1794 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1795 1796 /* actually zap the local rows */ 1797 /* 1798 Zero the required rows. If the "diagonal block" of the matrix 1799 is square and the user wishes to set the diagonal we use separate 1800 code so that MatSetValues() is not called for each diagonal allocating 1801 new memory, thus calling lots of mallocs and slowing things down. 1802 1803 Contributed by: Matthew Knepley 1804 */ 1805 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1806 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr); 1807 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 1808 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr); 1809 } else if (diag != 0.0) { 1810 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1811 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1812 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1813 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1814 } 1815 for (i=0; i<slen; i++) { 1816 row = lrows[i] + rstart_bs; 1817 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1818 } 1819 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1820 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1821 } else { 1822 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1823 } 1824 1825 ierr = PetscFree(lrows);CHKERRQ(ierr); 1826 1827 /* wait on sends */ 1828 if (nsends) { 1829 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1830 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1831 ierr = PetscFree(send_status);CHKERRQ(ierr); 1832 } 1833 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1834 ierr = PetscFree(svalues);CHKERRQ(ierr); 1835 1836 PetscFunctionReturn(0); 1837 } 1838 1839 #undef __FUNCT__ 1840 #define __FUNCT__ "MatPrintHelp_MPIBAIJ" 1841 PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A) 1842 { 1843 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1844 MPI_Comm comm = A->comm; 1845 static PetscTruth called = PETSC_FALSE; 1846 PetscErrorCode ierr; 1847 1848 PetscFunctionBegin; 1849 if (!a->rank) { 1850 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1851 } 1852 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1853 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1854 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1855 PetscFunctionReturn(0); 1856 } 1857 1858 #undef __FUNCT__ 1859 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1860 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1861 { 1862 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1863 PetscErrorCode ierr; 1864 1865 PetscFunctionBegin; 1866 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1867 PetscFunctionReturn(0); 1868 } 1869 1870 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1871 1872 #undef __FUNCT__ 1873 #define __FUNCT__ "MatEqual_MPIBAIJ" 1874 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1875 { 1876 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1877 Mat a,b,c,d; 1878 PetscTruth flg; 1879 PetscErrorCode ierr; 1880 1881 PetscFunctionBegin; 1882 a = matA->A; b = matA->B; 1883 c = matB->A; d = matB->B; 1884 1885 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1886 if (flg) { 1887 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1888 } 1889 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1890 PetscFunctionReturn(0); 1891 } 1892 1893 #undef __FUNCT__ 1894 #define __FUNCT__ "MatCopy_MPIBAIJ" 1895 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1896 { 1897 PetscErrorCode ierr; 1898 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1899 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1900 1901 PetscFunctionBegin; 1902 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1903 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1904 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1905 } else { 1906 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1907 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1908 } 1909 PetscFunctionReturn(0); 1910 } 1911 1912 #undef __FUNCT__ 1913 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1914 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1915 { 1916 PetscErrorCode ierr; 1917 1918 PetscFunctionBegin; 1919 ierr = MatMPIBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1920 PetscFunctionReturn(0); 1921 } 1922 1923 #include "petscblaslapack.h" 1924 #undef __FUNCT__ 1925 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1926 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1927 { 1928 PetscErrorCode ierr; 1929 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1930 PetscBLASInt bnz,one=1; 1931 Mat_SeqBAIJ *x,*y; 1932 1933 PetscFunctionBegin; 1934 if (str == SAME_NONZERO_PATTERN) { 1935 PetscScalar alpha = a; 1936 x = (Mat_SeqBAIJ *)xx->A->data; 1937 y = (Mat_SeqBAIJ *)yy->A->data; 1938 bnz = (PetscBLASInt)x->nz; 1939 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1940 x = (Mat_SeqBAIJ *)xx->B->data; 1941 y = (Mat_SeqBAIJ *)yy->B->data; 1942 bnz = (PetscBLASInt)x->nz; 1943 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1944 } else { 1945 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1946 } 1947 PetscFunctionReturn(0); 1948 } 1949 1950 #undef __FUNCT__ 1951 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1952 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1953 { 1954 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1955 PetscErrorCode ierr; 1956 1957 PetscFunctionBegin; 1958 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1959 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1960 PetscFunctionReturn(0); 1961 } 1962 1963 #undef __FUNCT__ 1964 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 1965 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1966 { 1967 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1968 PetscErrorCode ierr; 1969 1970 PetscFunctionBegin; 1971 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1972 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1973 PetscFunctionReturn(0); 1974 } 1975 1976 /* -------------------------------------------------------------------*/ 1977 static struct _MatOps MatOps_Values = { 1978 MatSetValues_MPIBAIJ, 1979 MatGetRow_MPIBAIJ, 1980 MatRestoreRow_MPIBAIJ, 1981 MatMult_MPIBAIJ, 1982 /* 4*/ MatMultAdd_MPIBAIJ, 1983 MatMultTranspose_MPIBAIJ, 1984 MatMultTransposeAdd_MPIBAIJ, 1985 0, 1986 0, 1987 0, 1988 /*10*/ 0, 1989 0, 1990 0, 1991 0, 1992 MatTranspose_MPIBAIJ, 1993 /*15*/ MatGetInfo_MPIBAIJ, 1994 MatEqual_MPIBAIJ, 1995 MatGetDiagonal_MPIBAIJ, 1996 MatDiagonalScale_MPIBAIJ, 1997 MatNorm_MPIBAIJ, 1998 /*20*/ MatAssemblyBegin_MPIBAIJ, 1999 MatAssemblyEnd_MPIBAIJ, 2000 0, 2001 MatSetOption_MPIBAIJ, 2002 MatZeroEntries_MPIBAIJ, 2003 /*25*/ MatZeroRows_MPIBAIJ, 2004 0, 2005 0, 2006 0, 2007 0, 2008 /*30*/ MatSetUpPreallocation_MPIBAIJ, 2009 0, 2010 0, 2011 0, 2012 0, 2013 /*35*/ MatDuplicate_MPIBAIJ, 2014 0, 2015 0, 2016 0, 2017 0, 2018 /*40*/ MatAXPY_MPIBAIJ, 2019 MatGetSubMatrices_MPIBAIJ, 2020 MatIncreaseOverlap_MPIBAIJ, 2021 MatGetValues_MPIBAIJ, 2022 MatCopy_MPIBAIJ, 2023 /*45*/ MatPrintHelp_MPIBAIJ, 2024 MatScale_MPIBAIJ, 2025 0, 2026 0, 2027 0, 2028 /*50*/ 0, 2029 0, 2030 0, 2031 0, 2032 0, 2033 /*55*/ 0, 2034 0, 2035 MatSetUnfactored_MPIBAIJ, 2036 0, 2037 MatSetValuesBlocked_MPIBAIJ, 2038 /*60*/ 0, 2039 MatDestroy_MPIBAIJ, 2040 MatView_MPIBAIJ, 2041 0, 2042 0, 2043 /*65*/ 0, 2044 0, 2045 0, 2046 0, 2047 0, 2048 /*70*/ MatGetRowMax_MPIBAIJ, 2049 0, 2050 0, 2051 0, 2052 0, 2053 /*75*/ 0, 2054 0, 2055 0, 2056 0, 2057 0, 2058 /*80*/ 0, 2059 0, 2060 0, 2061 0, 2062 MatLoad_MPIBAIJ, 2063 /*85*/ 0, 2064 0, 2065 0, 2066 0, 2067 0, 2068 /*90*/ 0, 2069 0, 2070 0, 2071 0, 2072 0, 2073 /*95*/ 0, 2074 0, 2075 0, 2076 0, 2077 0, 2078 /*100*/0, 2079 0, 2080 0, 2081 0, 2082 0, 2083 /*105*/0, 2084 MatRealPart_MPIBAIJ, 2085 MatImaginaryPart_MPIBAIJ}; 2086 2087 2088 EXTERN_C_BEGIN 2089 #undef __FUNCT__ 2090 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2091 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2092 { 2093 PetscFunctionBegin; 2094 *a = ((Mat_MPIBAIJ *)A->data)->A; 2095 *iscopy = PETSC_FALSE; 2096 PetscFunctionReturn(0); 2097 } 2098 EXTERN_C_END 2099 2100 EXTERN_C_BEGIN 2101 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2102 EXTERN_C_END 2103 2104 #undef __FUNCT__ 2105 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2106 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2107 { 2108 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)B->data; 2109 PetscInt m = B->rmap.n/bs,cstart = baij->cstartbs, cend = baij->cendbs,j,nnz,i,d; 2110 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = baij->rstartbs,ii; 2111 const PetscInt *JJ; 2112 PetscScalar *values; 2113 PetscErrorCode ierr; 2114 2115 PetscFunctionBegin; 2116 #if defined(PETSC_OPT_g) 2117 if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]); 2118 #endif 2119 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2120 o_nnz = d_nnz + m; 2121 2122 for (i=0; i<m; i++) { 2123 nnz = I[i+1]- I[i]; 2124 JJ = J + I[i]; 2125 nnz_max = PetscMax(nnz_max,nnz); 2126 #if defined(PETSC_OPT_g) 2127 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2128 #endif 2129 for (j=0; j<nnz; j++) { 2130 if (*JJ >= cstart) break; 2131 JJ++; 2132 } 2133 d = 0; 2134 for (; j<nnz; j++) { 2135 if (*JJ++ >= cend) break; 2136 d++; 2137 } 2138 d_nnz[i] = d; 2139 o_nnz[i] = nnz - d; 2140 } 2141 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2142 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2143 2144 if (v) values = (PetscScalar*)v; 2145 else { 2146 ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2147 ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2148 } 2149 2150 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2151 for (i=0; i<m; i++) { 2152 ii = i + rstart; 2153 nnz = I[i+1]- I[i]; 2154 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2155 } 2156 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2157 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2158 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2159 2160 if (!v) { 2161 ierr = PetscFree(values);CHKERRQ(ierr); 2162 } 2163 PetscFunctionReturn(0); 2164 } 2165 2166 #undef __FUNCT__ 2167 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2168 /*@C 2169 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2170 (the default parallel PETSc format). 2171 2172 Collective on MPI_Comm 2173 2174 Input Parameters: 2175 + A - the matrix 2176 . i - the indices into j for the start of each local row (starts with zero) 2177 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2178 - v - optional values in the matrix 2179 2180 Level: developer 2181 2182 .keywords: matrix, aij, compressed row, sparse, parallel 2183 2184 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2185 @*/ 2186 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2187 { 2188 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2189 2190 PetscFunctionBegin; 2191 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2192 if (f) { 2193 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2194 } 2195 PetscFunctionReturn(0); 2196 } 2197 2198 EXTERN_C_BEGIN 2199 #undef __FUNCT__ 2200 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2201 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2202 { 2203 Mat_MPIBAIJ *b; 2204 PetscErrorCode ierr; 2205 PetscInt i; 2206 2207 PetscFunctionBegin; 2208 B->preallocated = PETSC_TRUE; 2209 ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2210 2211 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2212 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2213 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2214 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2215 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2216 2217 B->rmap.bs = bs; 2218 B->cmap.bs = bs; 2219 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 2220 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 2221 2222 if (d_nnz) { 2223 for (i=0; i<B->rmap.n/bs; i++) { 2224 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2225 } 2226 } 2227 if (o_nnz) { 2228 for (i=0; i<B->rmap.n/bs; i++) { 2229 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2230 } 2231 } 2232 2233 b = (Mat_MPIBAIJ*)B->data; 2234 b->bs2 = bs*bs; 2235 b->mbs = B->rmap.n/bs; 2236 b->nbs = B->cmap.n/bs; 2237 b->Mbs = B->rmap.N/bs; 2238 b->Nbs = B->cmap.N/bs; 2239 2240 for (i=0; i<=b->size; i++) { 2241 b->rangebs[i] = B->rmap.range[i]/bs; 2242 } 2243 b->rstartbs = B->rmap.rstart/bs; 2244 b->rendbs = B->rmap.rend/bs; 2245 b->cstartbs = B->cmap.rstart/bs; 2246 b->cendbs = B->cmap.rend/bs; 2247 2248 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2249 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 2250 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2251 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2252 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2253 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2254 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 2255 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2256 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2257 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2258 2259 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2260 2261 PetscFunctionReturn(0); 2262 } 2263 EXTERN_C_END 2264 2265 EXTERN_C_BEGIN 2266 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2267 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2268 EXTERN_C_END 2269 2270 /*MC 2271 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2272 2273 Options Database Keys: 2274 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2275 2276 Level: beginner 2277 2278 .seealso: MatCreateMPIBAIJ 2279 M*/ 2280 2281 EXTERN_C_BEGIN 2282 #undef __FUNCT__ 2283 #define __FUNCT__ "MatCreate_MPIBAIJ" 2284 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 2285 { 2286 Mat_MPIBAIJ *b; 2287 PetscErrorCode ierr; 2288 PetscTruth flg; 2289 2290 PetscFunctionBegin; 2291 ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2292 B->data = (void*)b; 2293 2294 2295 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2296 B->mapping = 0; 2297 B->factor = 0; 2298 B->assembled = PETSC_FALSE; 2299 2300 B->insertmode = NOT_SET_VALUES; 2301 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 2302 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 2303 2304 /* build local table of row and column ownerships */ 2305 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 2306 2307 /* build cache for off array entries formed */ 2308 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2309 b->donotstash = PETSC_FALSE; 2310 b->colmap = PETSC_NULL; 2311 b->garray = PETSC_NULL; 2312 b->roworiented = PETSC_TRUE; 2313 2314 #if defined(PETSC_USE_MAT_SINGLE) 2315 /* stuff for MatSetValues_XXX in single precision */ 2316 b->setvalueslen = 0; 2317 b->setvaluescopy = PETSC_NULL; 2318 #endif 2319 2320 /* stuff used in block assembly */ 2321 b->barray = 0; 2322 2323 /* stuff used for matrix vector multiply */ 2324 b->lvec = 0; 2325 b->Mvctx = 0; 2326 2327 /* stuff for MatGetRow() */ 2328 b->rowindices = 0; 2329 b->rowvalues = 0; 2330 b->getrowactive = PETSC_FALSE; 2331 2332 /* hash table stuff */ 2333 b->ht = 0; 2334 b->hd = 0; 2335 b->ht_size = 0; 2336 b->ht_flag = PETSC_FALSE; 2337 b->ht_fact = 0; 2338 b->ht_total_ct = 0; 2339 b->ht_insert_ct = 0; 2340 2341 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2342 if (flg) { 2343 PetscReal fact = 1.39; 2344 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2345 ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2346 if (fact <= 1.0) fact = 1.39; 2347 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2348 ierr = PetscInfo1(0,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2349 } 2350 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2351 "MatStoreValues_MPIBAIJ", 2352 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2353 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2354 "MatRetrieveValues_MPIBAIJ", 2355 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2356 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2357 "MatGetDiagonalBlock_MPIBAIJ", 2358 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2359 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2360 "MatMPIBAIJSetPreallocation_MPIBAIJ", 2361 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2362 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 2363 "MatMPIBAIJSetPreallocationCSR_MPIAIJ", 2364 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 2365 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 2366 "MatDiagonalScaleLocal_MPIBAIJ", 2367 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2368 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 2369 "MatSetHashTableFactor_MPIBAIJ", 2370 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2371 PetscFunctionReturn(0); 2372 } 2373 EXTERN_C_END 2374 2375 /*MC 2376 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2377 2378 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2379 and MATMPIBAIJ otherwise. 2380 2381 Options Database Keys: 2382 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2383 2384 Level: beginner 2385 2386 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2387 M*/ 2388 2389 EXTERN_C_BEGIN 2390 #undef __FUNCT__ 2391 #define __FUNCT__ "MatCreate_BAIJ" 2392 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 2393 { 2394 PetscErrorCode ierr; 2395 PetscMPIInt size; 2396 2397 PetscFunctionBegin; 2398 ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr); 2399 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2400 if (size == 1) { 2401 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 2402 } else { 2403 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 2404 } 2405 PetscFunctionReturn(0); 2406 } 2407 EXTERN_C_END 2408 2409 #undef __FUNCT__ 2410 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2411 /*@C 2412 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2413 (block compressed row). For good matrix assembly performance 2414 the user should preallocate the matrix storage by setting the parameters 2415 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2416 performance can be increased by more than a factor of 50. 2417 2418 Collective on Mat 2419 2420 Input Parameters: 2421 + A - the matrix 2422 . bs - size of blockk 2423 . d_nz - number of block nonzeros per block row in diagonal portion of local 2424 submatrix (same for all local rows) 2425 . d_nnz - array containing the number of block nonzeros in the various block rows 2426 of the in diagonal portion of the local (possibly different for each block 2427 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2428 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2429 submatrix (same for all local rows). 2430 - o_nnz - array containing the number of nonzeros in the various block rows of the 2431 off-diagonal portion of the local submatrix (possibly different for 2432 each block row) or PETSC_NULL. 2433 2434 If the *_nnz parameter is given then the *_nz parameter is ignored 2435 2436 Options Database Keys: 2437 . -mat_no_unroll - uses code that does not unroll the loops in the 2438 block calculations (much slower) 2439 . -mat_block_size - size of the blocks to use 2440 2441 Notes: 2442 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2443 than it must be used on all processors that share the object for that argument. 2444 2445 Storage Information: 2446 For a square global matrix we define each processor's diagonal portion 2447 to be its local rows and the corresponding columns (a square submatrix); 2448 each processor's off-diagonal portion encompasses the remainder of the 2449 local matrix (a rectangular submatrix). 2450 2451 The user can specify preallocated storage for the diagonal part of 2452 the local submatrix with either d_nz or d_nnz (not both). Set 2453 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2454 memory allocation. Likewise, specify preallocated storage for the 2455 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2456 2457 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2458 the figure below we depict these three local rows and all columns (0-11). 2459 2460 .vb 2461 0 1 2 3 4 5 6 7 8 9 10 11 2462 ------------------- 2463 row 3 | o o o d d d o o o o o o 2464 row 4 | o o o d d d o o o o o o 2465 row 5 | o o o d d d o o o o o o 2466 ------------------- 2467 .ve 2468 2469 Thus, any entries in the d locations are stored in the d (diagonal) 2470 submatrix, and any entries in the o locations are stored in the 2471 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2472 stored simply in the MATSEQBAIJ format for compressed row storage. 2473 2474 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2475 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2476 In general, for PDE problems in which most nonzeros are near the diagonal, 2477 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2478 or you will get TERRIBLE performance; see the users' manual chapter on 2479 matrices. 2480 2481 Level: intermediate 2482 2483 .keywords: matrix, block, aij, compressed row, sparse, parallel 2484 2485 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 2486 @*/ 2487 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2488 { 2489 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2490 2491 PetscFunctionBegin; 2492 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2493 if (f) { 2494 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2495 } 2496 PetscFunctionReturn(0); 2497 } 2498 2499 #undef __FUNCT__ 2500 #define __FUNCT__ "MatCreateMPIBAIJ" 2501 /*@C 2502 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 2503 (block compressed row). For good matrix assembly performance 2504 the user should preallocate the matrix storage by setting the parameters 2505 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2506 performance can be increased by more than a factor of 50. 2507 2508 Collective on MPI_Comm 2509 2510 Input Parameters: 2511 + comm - MPI communicator 2512 . bs - size of blockk 2513 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2514 This value should be the same as the local size used in creating the 2515 y vector for the matrix-vector product y = Ax. 2516 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2517 This value should be the same as the local size used in creating the 2518 x vector for the matrix-vector product y = Ax. 2519 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2520 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2521 . d_nz - number of nonzero blocks per block row in diagonal portion of local 2522 submatrix (same for all local rows) 2523 . d_nnz - array containing the number of nonzero blocks in the various block rows 2524 of the in diagonal portion of the local (possibly different for each block 2525 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2526 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 2527 submatrix (same for all local rows). 2528 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 2529 off-diagonal portion of the local submatrix (possibly different for 2530 each block row) or PETSC_NULL. 2531 2532 Output Parameter: 2533 . A - the matrix 2534 2535 Options Database Keys: 2536 . -mat_no_unroll - uses code that does not unroll the loops in the 2537 block calculations (much slower) 2538 . -mat_block_size - size of the blocks to use 2539 2540 Notes: 2541 If the *_nnz parameter is given then the *_nz parameter is ignored 2542 2543 A nonzero block is any block that as 1 or more nonzeros in it 2544 2545 The user MUST specify either the local or global matrix dimensions 2546 (possibly both). 2547 2548 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2549 than it must be used on all processors that share the object for that argument. 2550 2551 Storage Information: 2552 For a square global matrix we define each processor's diagonal portion 2553 to be its local rows and the corresponding columns (a square submatrix); 2554 each processor's off-diagonal portion encompasses the remainder of the 2555 local matrix (a rectangular submatrix). 2556 2557 The user can specify preallocated storage for the diagonal part of 2558 the local submatrix with either d_nz or d_nnz (not both). Set 2559 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2560 memory allocation. Likewise, specify preallocated storage for the 2561 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2562 2563 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2564 the figure below we depict these three local rows and all columns (0-11). 2565 2566 .vb 2567 0 1 2 3 4 5 6 7 8 9 10 11 2568 ------------------- 2569 row 3 | o o o d d d o o o o o o 2570 row 4 | o o o d d d o o o o o o 2571 row 5 | o o o d d d o o o o o o 2572 ------------------- 2573 .ve 2574 2575 Thus, any entries in the d locations are stored in the d (diagonal) 2576 submatrix, and any entries in the o locations are stored in the 2577 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2578 stored simply in the MATSEQBAIJ format for compressed row storage. 2579 2580 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2581 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2582 In general, for PDE problems in which most nonzeros are near the diagonal, 2583 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2584 or you will get TERRIBLE performance; see the users' manual chapter on 2585 matrices. 2586 2587 Level: intermediate 2588 2589 .keywords: matrix, block, aij, compressed row, sparse, parallel 2590 2591 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2592 @*/ 2593 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2594 { 2595 PetscErrorCode ierr; 2596 PetscMPIInt size; 2597 2598 PetscFunctionBegin; 2599 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2600 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2601 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2602 if (size > 1) { 2603 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2604 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2605 } else { 2606 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2607 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2608 } 2609 PetscFunctionReturn(0); 2610 } 2611 2612 #undef __FUNCT__ 2613 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 2614 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2615 { 2616 Mat mat; 2617 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2618 PetscErrorCode ierr; 2619 PetscInt len=0; 2620 2621 PetscFunctionBegin; 2622 *newmat = 0; 2623 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 2624 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2625 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2626 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2627 2628 mat->factor = matin->factor; 2629 mat->preallocated = PETSC_TRUE; 2630 mat->assembled = PETSC_TRUE; 2631 mat->insertmode = NOT_SET_VALUES; 2632 2633 a = (Mat_MPIBAIJ*)mat->data; 2634 mat->rmap.bs = matin->rmap.bs; 2635 a->bs2 = oldmat->bs2; 2636 a->mbs = oldmat->mbs; 2637 a->nbs = oldmat->nbs; 2638 a->Mbs = oldmat->Mbs; 2639 a->Nbs = oldmat->Nbs; 2640 2641 ierr = PetscMapCopy(matin->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2642 ierr = PetscMapCopy(matin->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2643 2644 a->size = oldmat->size; 2645 a->rank = oldmat->rank; 2646 a->donotstash = oldmat->donotstash; 2647 a->roworiented = oldmat->roworiented; 2648 a->rowindices = 0; 2649 a->rowvalues = 0; 2650 a->getrowactive = PETSC_FALSE; 2651 a->barray = 0; 2652 a->rstartbs = oldmat->rstartbs; 2653 a->rendbs = oldmat->rendbs; 2654 a->cstartbs = oldmat->cstartbs; 2655 a->cendbs = oldmat->cendbs; 2656 2657 /* hash table stuff */ 2658 a->ht = 0; 2659 a->hd = 0; 2660 a->ht_size = 0; 2661 a->ht_flag = oldmat->ht_flag; 2662 a->ht_fact = oldmat->ht_fact; 2663 a->ht_total_ct = 0; 2664 a->ht_insert_ct = 0; 2665 2666 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 2667 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2668 ierr = MatStashCreate_Private(matin->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr); 2669 if (oldmat->colmap) { 2670 #if defined (PETSC_USE_CTABLE) 2671 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2672 #else 2673 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2674 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2675 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2676 #endif 2677 } else a->colmap = 0; 2678 2679 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2680 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2681 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2682 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2683 } else a->garray = 0; 2684 2685 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2686 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2687 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2688 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2689 2690 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2691 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2692 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2693 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2694 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2695 *newmat = mat; 2696 2697 PetscFunctionReturn(0); 2698 } 2699 2700 #include "petscsys.h" 2701 2702 #undef __FUNCT__ 2703 #define __FUNCT__ "MatLoad_MPIBAIJ" 2704 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2705 { 2706 Mat A; 2707 PetscErrorCode ierr; 2708 int fd; 2709 PetscInt i,nz,j,rstart,rend; 2710 PetscScalar *vals,*buf; 2711 MPI_Comm comm = ((PetscObject)viewer)->comm; 2712 MPI_Status status; 2713 PetscMPIInt rank,size,maxnz; 2714 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 2715 PetscInt *locrowlens,*procsnz = 0,*browners; 2716 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 2717 PetscMPIInt tag = ((PetscObject)viewer)->tag; 2718 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2719 PetscInt dcount,kmax,k,nzcount,tmp,mend; 2720 2721 PetscFunctionBegin; 2722 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2723 2724 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2725 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2726 if (!rank) { 2727 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2728 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2729 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2730 } 2731 2732 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2733 M = header[1]; N = header[2]; 2734 2735 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2736 2737 /* 2738 This code adds extra rows to make sure the number of rows is 2739 divisible by the blocksize 2740 */ 2741 Mbs = M/bs; 2742 extra_rows = bs - M + bs*Mbs; 2743 if (extra_rows == bs) extra_rows = 0; 2744 else Mbs++; 2745 if (extra_rows && !rank) { 2746 ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2747 } 2748 2749 /* determine ownership of all rows */ 2750 mbs = Mbs/size + ((Mbs % size) > rank); 2751 m = mbs*bs; 2752 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 2753 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2754 2755 /* process 0 needs enough room for process with most rows */ 2756 if (!rank) { 2757 mmax = rowners[1]; 2758 for (i=2; i<size; i++) { 2759 mmax = PetscMax(mmax,rowners[i]); 2760 } 2761 mmax*=bs; 2762 } else mmax = m; 2763 2764 rowners[0] = 0; 2765 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2766 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2767 rstart = rowners[rank]; 2768 rend = rowners[rank+1]; 2769 2770 /* distribute row lengths to all processors */ 2771 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 2772 if (!rank) { 2773 mend = m; 2774 if (size == 1) mend = mend - extra_rows; 2775 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 2776 for (j=mend; j<m; j++) locrowlens[j] = 1; 2777 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2778 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2779 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2780 for (j=0; j<m; j++) { 2781 procsnz[0] += locrowlens[j]; 2782 } 2783 for (i=1; i<size; i++) { 2784 mend = browners[i+1] - browners[i]; 2785 if (i == size-1) mend = mend - extra_rows; 2786 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 2787 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 2788 /* calculate the number of nonzeros on each processor */ 2789 for (j=0; j<browners[i+1]-browners[i]; j++) { 2790 procsnz[i] += rowlengths[j]; 2791 } 2792 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2793 } 2794 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2795 } else { 2796 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2797 } 2798 2799 if (!rank) { 2800 /* determine max buffer needed and allocate it */ 2801 maxnz = procsnz[0]; 2802 for (i=1; i<size; i++) { 2803 maxnz = PetscMax(maxnz,procsnz[i]); 2804 } 2805 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2806 2807 /* read in my part of the matrix column indices */ 2808 nz = procsnz[0]; 2809 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2810 mycols = ibuf; 2811 if (size == 1) nz -= extra_rows; 2812 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2813 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2814 2815 /* read in every ones (except the last) and ship off */ 2816 for (i=1; i<size-1; i++) { 2817 nz = procsnz[i]; 2818 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2819 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2820 } 2821 /* read in the stuff for the last proc */ 2822 if (size != 1) { 2823 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2824 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2825 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2826 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2827 } 2828 ierr = PetscFree(cols);CHKERRQ(ierr); 2829 } else { 2830 /* determine buffer space needed for message */ 2831 nz = 0; 2832 for (i=0; i<m; i++) { 2833 nz += locrowlens[i]; 2834 } 2835 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2836 mycols = ibuf; 2837 /* receive message of column indices*/ 2838 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2839 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2840 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2841 } 2842 2843 /* loop over local rows, determining number of off diagonal entries */ 2844 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2845 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2846 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2847 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2848 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2849 rowcount = 0; nzcount = 0; 2850 for (i=0; i<mbs; i++) { 2851 dcount = 0; 2852 odcount = 0; 2853 for (j=0; j<bs; j++) { 2854 kmax = locrowlens[rowcount]; 2855 for (k=0; k<kmax; k++) { 2856 tmp = mycols[nzcount++]/bs; 2857 if (!mask[tmp]) { 2858 mask[tmp] = 1; 2859 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2860 else masked1[dcount++] = tmp; 2861 } 2862 } 2863 rowcount++; 2864 } 2865 2866 dlens[i] = dcount; 2867 odlens[i] = odcount; 2868 2869 /* zero out the mask elements we set */ 2870 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2871 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2872 } 2873 2874 /* create our matrix */ 2875 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2876 ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2877 ierr = MatSetType(A,type);CHKERRQ(ierr) 2878 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2879 2880 /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */ 2881 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2882 2883 if (!rank) { 2884 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2885 /* read in my part of the matrix numerical values */ 2886 nz = procsnz[0]; 2887 vals = buf; 2888 mycols = ibuf; 2889 if (size == 1) nz -= extra_rows; 2890 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2891 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2892 2893 /* insert into matrix */ 2894 jj = rstart*bs; 2895 for (i=0; i<m; i++) { 2896 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2897 mycols += locrowlens[i]; 2898 vals += locrowlens[i]; 2899 jj++; 2900 } 2901 /* read in other processors (except the last one) and ship out */ 2902 for (i=1; i<size-1; i++) { 2903 nz = procsnz[i]; 2904 vals = buf; 2905 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2906 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2907 } 2908 /* the last proc */ 2909 if (size != 1){ 2910 nz = procsnz[i] - extra_rows; 2911 vals = buf; 2912 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2913 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2914 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2915 } 2916 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2917 } else { 2918 /* receive numeric values */ 2919 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2920 2921 /* receive message of values*/ 2922 vals = buf; 2923 mycols = ibuf; 2924 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2925 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2926 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2927 2928 /* insert into matrix */ 2929 jj = rstart*bs; 2930 for (i=0; i<m; i++) { 2931 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2932 mycols += locrowlens[i]; 2933 vals += locrowlens[i]; 2934 jj++; 2935 } 2936 } 2937 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2938 ierr = PetscFree(buf);CHKERRQ(ierr); 2939 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2940 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2941 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2942 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2943 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2944 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2945 2946 *newmat = A; 2947 PetscFunctionReturn(0); 2948 } 2949 2950 #undef __FUNCT__ 2951 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2952 /*@ 2953 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2954 2955 Input Parameters: 2956 . mat - the matrix 2957 . fact - factor 2958 2959 Collective on Mat 2960 2961 Level: advanced 2962 2963 Notes: 2964 This can also be set by the command line option: -mat_use_hash_table fact 2965 2966 .keywords: matrix, hashtable, factor, HT 2967 2968 .seealso: MatSetOption() 2969 @*/ 2970 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2971 { 2972 PetscErrorCode ierr,(*f)(Mat,PetscReal); 2973 2974 PetscFunctionBegin; 2975 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 2976 if (f) { 2977 ierr = (*f)(mat,fact);CHKERRQ(ierr); 2978 } 2979 PetscFunctionReturn(0); 2980 } 2981 2982 EXTERN_C_BEGIN 2983 #undef __FUNCT__ 2984 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 2985 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 2986 { 2987 Mat_MPIBAIJ *baij; 2988 2989 PetscFunctionBegin; 2990 baij = (Mat_MPIBAIJ*)mat->data; 2991 baij->ht_fact = fact; 2992 PetscFunctionReturn(0); 2993 } 2994 EXTERN_C_END 2995 2996 #undef __FUNCT__ 2997 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 2998 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2999 { 3000 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3001 PetscFunctionBegin; 3002 *Ad = a->A; 3003 *Ao = a->B; 3004 *colmap = a->garray; 3005 PetscFunctionReturn(0); 3006 } 3007