1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14 EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat); 15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar); 16 17 /* UGLY, ugly, ugly 18 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 19 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 20 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 21 converts the entries into single precision and then calls ..._MatScalar() to put them 22 into the single precision data structures. 23 */ 24 #if defined(PETSC_USE_MAT_SINGLE) 25 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 26 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 27 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 28 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 29 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 30 #else 31 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 32 #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 33 #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 34 #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 35 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 36 #endif 37 38 #undef __FUNCT__ 39 #define __FUNCT__ "MatGetRowMax_MPIBAIJ" 40 PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v) 41 { 42 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 43 PetscErrorCode ierr; 44 PetscInt i; 45 PetscScalar *va,*vb; 46 Vec vtmp; 47 48 PetscFunctionBegin; 49 50 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 51 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 52 53 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap.n,&vtmp);CHKERRQ(ierr); 54 ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr); 55 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 56 57 for (i=0; i<A->rmap.n; i++){ 58 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i]; 59 } 60 61 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 62 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 63 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 64 65 PetscFunctionReturn(0); 66 } 67 68 EXTERN_C_BEGIN 69 #undef __FUNCT__ 70 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 71 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 72 { 73 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 74 PetscErrorCode ierr; 75 76 PetscFunctionBegin; 77 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 78 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 79 PetscFunctionReturn(0); 80 } 81 EXTERN_C_END 82 83 EXTERN_C_BEGIN 84 #undef __FUNCT__ 85 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 86 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 87 { 88 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 89 PetscErrorCode ierr; 90 91 PetscFunctionBegin; 92 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 93 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 94 PetscFunctionReturn(0); 95 } 96 EXTERN_C_END 97 98 /* 99 Local utility routine that creates a mapping from the global column 100 number to the local number in the off-diagonal part of the local 101 storage of the matrix. This is done in a non scable way since the 102 length of colmap equals the global matrix length. 103 */ 104 #undef __FUNCT__ 105 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 106 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 107 { 108 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 109 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 110 PetscErrorCode ierr; 111 PetscInt nbs = B->nbs,i,bs=mat->rmap.bs; 112 113 PetscFunctionBegin; 114 #if defined (PETSC_USE_CTABLE) 115 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 116 for (i=0; i<nbs; i++){ 117 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 118 } 119 #else 120 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 121 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 122 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 123 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 124 #endif 125 PetscFunctionReturn(0); 126 } 127 128 #define CHUNKSIZE 10 129 130 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 131 { \ 132 \ 133 brow = row/bs; \ 134 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 135 rmax = aimax[brow]; nrow = ailen[brow]; \ 136 bcol = col/bs; \ 137 ridx = row % bs; cidx = col % bs; \ 138 low = 0; high = nrow; \ 139 while (high-low > 3) { \ 140 t = (low+high)/2; \ 141 if (rp[t] > bcol) high = t; \ 142 else low = t; \ 143 } \ 144 for (_i=low; _i<high; _i++) { \ 145 if (rp[_i] > bcol) break; \ 146 if (rp[_i] == bcol) { \ 147 bap = ap + bs2*_i + bs*cidx + ridx; \ 148 if (addv == ADD_VALUES) *bap += value; \ 149 else *bap = value; \ 150 goto a_noinsert; \ 151 } \ 152 } \ 153 if (a->nonew == 1) goto a_noinsert; \ 154 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 155 MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,aimax,a->nonew); \ 156 N = nrow++ - 1; \ 157 /* shift up all the later entries in this row */ \ 158 for (ii=N; ii>=_i; ii--) { \ 159 rp[ii+1] = rp[ii]; \ 160 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 161 } \ 162 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 163 rp[_i] = bcol; \ 164 ap[bs2*_i + bs*cidx + ridx] = value; \ 165 a_noinsert:; \ 166 ailen[brow] = nrow; \ 167 } 168 169 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 170 { \ 171 brow = row/bs; \ 172 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 173 rmax = bimax[brow]; nrow = bilen[brow]; \ 174 bcol = col/bs; \ 175 ridx = row % bs; cidx = col % bs; \ 176 low = 0; high = nrow; \ 177 while (high-low > 3) { \ 178 t = (low+high)/2; \ 179 if (rp[t] > bcol) high = t; \ 180 else low = t; \ 181 } \ 182 for (_i=low; _i<high; _i++) { \ 183 if (rp[_i] > bcol) break; \ 184 if (rp[_i] == bcol) { \ 185 bap = ap + bs2*_i + bs*cidx + ridx; \ 186 if (addv == ADD_VALUES) *bap += value; \ 187 else *bap = value; \ 188 goto b_noinsert; \ 189 } \ 190 } \ 191 if (b->nonew == 1) goto b_noinsert; \ 192 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 193 MatSeqXAIJReallocateAIJ(b,bs2,nrow,brow,bcol,rmax,ba,bi,bj,b->mbs,rp,ap,bimax,b->nonew); \ 194 CHKMEMQ;\ 195 N = nrow++ - 1; \ 196 /* shift up all the later entries in this row */ \ 197 for (ii=N; ii>=_i; ii--) { \ 198 rp[ii+1] = rp[ii]; \ 199 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 200 } \ 201 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 202 rp[_i] = bcol; \ 203 ap[bs2*_i + bs*cidx + ridx] = value; \ 204 b_noinsert:; \ 205 bilen[brow] = nrow; \ 206 } 207 208 #if defined(PETSC_USE_MAT_SINGLE) 209 #undef __FUNCT__ 210 #define __FUNCT__ "MatSetValues_MPIBAIJ" 211 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 212 { 213 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 214 PetscErrorCode ierr; 215 PetscInt i,N = m*n; 216 MatScalar *vsingle; 217 218 PetscFunctionBegin; 219 if (N > b->setvalueslen) { 220 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 221 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 222 b->setvalueslen = N; 223 } 224 vsingle = b->setvaluescopy; 225 226 for (i=0; i<N; i++) { 227 vsingle[i] = v[i]; 228 } 229 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 230 PetscFunctionReturn(0); 231 } 232 233 #undef __FUNCT__ 234 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 235 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 236 { 237 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 238 PetscErrorCode ierr; 239 PetscInt i,N = m*n*b->bs2; 240 MatScalar *vsingle; 241 242 PetscFunctionBegin; 243 if (N > b->setvalueslen) { 244 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 245 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 246 b->setvalueslen = N; 247 } 248 vsingle = b->setvaluescopy; 249 for (i=0; i<N; i++) { 250 vsingle[i] = v[i]; 251 } 252 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 253 PetscFunctionReturn(0); 254 } 255 256 #undef __FUNCT__ 257 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 258 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 259 { 260 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 261 PetscErrorCode ierr; 262 PetscInt i,N = m*n; 263 MatScalar *vsingle; 264 265 PetscFunctionBegin; 266 if (N > b->setvalueslen) { 267 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 268 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 269 b->setvalueslen = N; 270 } 271 vsingle = b->setvaluescopy; 272 for (i=0; i<N; i++) { 273 vsingle[i] = v[i]; 274 } 275 ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 276 PetscFunctionReturn(0); 277 } 278 279 #undef __FUNCT__ 280 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 281 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 282 { 283 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 284 PetscErrorCode ierr; 285 PetscInt i,N = m*n*b->bs2; 286 MatScalar *vsingle; 287 288 PetscFunctionBegin; 289 if (N > b->setvalueslen) { 290 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 291 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 292 b->setvalueslen = N; 293 } 294 vsingle = b->setvaluescopy; 295 for (i=0; i<N; i++) { 296 vsingle[i] = v[i]; 297 } 298 ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 299 PetscFunctionReturn(0); 300 } 301 #endif 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 305 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 306 { 307 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 308 MatScalar value; 309 PetscTruth roworiented = baij->roworiented; 310 PetscErrorCode ierr; 311 PetscInt i,j,row,col; 312 PetscInt rstart_orig=mat->rmap.rstart; 313 PetscInt rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart; 314 PetscInt cend_orig=mat->cmap.rend,bs=mat->rmap.bs; 315 316 /* Some Variables required in the macro */ 317 Mat A = baij->A; 318 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 319 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 320 MatScalar *aa=a->a; 321 322 Mat B = baij->B; 323 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 324 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 325 MatScalar *ba=b->a; 326 327 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 328 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 329 MatScalar *ap,*bap; 330 331 PetscFunctionBegin; 332 for (i=0; i<m; i++) { 333 if (im[i] < 0) continue; 334 #if defined(PETSC_USE_DEBUG) 335 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 336 #endif 337 if (im[i] >= rstart_orig && im[i] < rend_orig) { 338 row = im[i] - rstart_orig; 339 for (j=0; j<n; j++) { 340 if (in[j] >= cstart_orig && in[j] < cend_orig){ 341 col = in[j] - cstart_orig; 342 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 343 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 344 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 345 } else if (in[j] < 0) continue; 346 #if defined(PETSC_USE_DEBUG) 347 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap.N-1);} 348 #endif 349 else { 350 if (mat->was_assembled) { 351 if (!baij->colmap) { 352 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 353 } 354 #if defined (PETSC_USE_CTABLE) 355 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 356 col = col - 1; 357 #else 358 col = baij->colmap[in[j]/bs] - 1; 359 #endif 360 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 361 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 362 col = in[j]; 363 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 364 B = baij->B; 365 b = (Mat_SeqBAIJ*)(B)->data; 366 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 367 ba=b->a; 368 } else col += in[j]%bs; 369 } else col = in[j]; 370 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 371 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 372 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 373 } 374 } 375 } else { 376 if (!baij->donotstash) { 377 if (roworiented) { 378 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 379 } else { 380 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 381 } 382 } 383 } 384 } 385 PetscFunctionReturn(0); 386 } 387 388 #undef __FUNCT__ 389 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar" 390 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 391 { 392 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 393 const MatScalar *value; 394 MatScalar *barray=baij->barray; 395 PetscTruth roworiented = baij->roworiented; 396 PetscErrorCode ierr; 397 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 398 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 399 PetscInt cend=baij->cendbs,bs=mat->rmap.bs,bs2=baij->bs2; 400 401 PetscFunctionBegin; 402 if(!barray) { 403 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 404 baij->barray = barray; 405 } 406 407 if (roworiented) { 408 stepval = (n-1)*bs; 409 } else { 410 stepval = (m-1)*bs; 411 } 412 for (i=0; i<m; i++) { 413 if (im[i] < 0) continue; 414 #if defined(PETSC_USE_DEBUG) 415 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 416 #endif 417 if (im[i] >= rstart && im[i] < rend) { 418 row = im[i] - rstart; 419 for (j=0; j<n; j++) { 420 /* If NumCol = 1 then a copy is not required */ 421 if ((roworiented) && (n == 1)) { 422 barray = (MatScalar*)v + i*bs2; 423 } else if((!roworiented) && (m == 1)) { 424 barray = (MatScalar*)v + j*bs2; 425 } else { /* Here a copy is required */ 426 if (roworiented) { 427 value = v + i*(stepval+bs)*bs + j*bs; 428 } else { 429 value = v + j*(stepval+bs)*bs + i*bs; 430 } 431 for (ii=0; ii<bs; ii++,value+=stepval) { 432 for (jj=0; jj<bs; jj++) { 433 *barray++ = *value++; 434 } 435 } 436 barray -=bs2; 437 } 438 439 if (in[j] >= cstart && in[j] < cend){ 440 col = in[j] - cstart; 441 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 442 } 443 else if (in[j] < 0) continue; 444 #if defined(PETSC_USE_DEBUG) 445 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 446 #endif 447 else { 448 if (mat->was_assembled) { 449 if (!baij->colmap) { 450 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 451 } 452 453 #if defined(PETSC_USE_DEBUG) 454 #if defined (PETSC_USE_CTABLE) 455 { PetscInt data; 456 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 457 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 458 } 459 #else 460 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 461 #endif 462 #endif 463 #if defined (PETSC_USE_CTABLE) 464 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 465 col = (col - 1)/bs; 466 #else 467 col = (baij->colmap[in[j]] - 1)/bs; 468 #endif 469 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 470 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 471 col = in[j]; 472 } 473 } 474 else col = in[j]; 475 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 476 } 477 } 478 } else { 479 if (!baij->donotstash) { 480 if (roworiented) { 481 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 482 } else { 483 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 484 } 485 } 486 } 487 } 488 PetscFunctionReturn(0); 489 } 490 491 #define HASH_KEY 0.6180339887 492 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 493 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 494 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 495 #undef __FUNCT__ 496 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar" 497 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 498 { 499 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 500 PetscTruth roworiented = baij->roworiented; 501 PetscErrorCode ierr; 502 PetscInt i,j,row,col; 503 PetscInt rstart_orig=mat->rmap.rstart; 504 PetscInt rend_orig=mat->rmap.rend,Nbs=baij->Nbs; 505 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap.bs,*HT=baij->ht,idx; 506 PetscReal tmp; 507 MatScalar **HD = baij->hd,value; 508 #if defined(PETSC_USE_DEBUG) 509 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 510 #endif 511 512 PetscFunctionBegin; 513 514 for (i=0; i<m; i++) { 515 #if defined(PETSC_USE_DEBUG) 516 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 517 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 518 #endif 519 row = im[i]; 520 if (row >= rstart_orig && row < rend_orig) { 521 for (j=0; j<n; j++) { 522 col = in[j]; 523 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 524 /* Look up PetscInto the Hash Table */ 525 key = (row/bs)*Nbs+(col/bs)+1; 526 h1 = HASH(size,key,tmp); 527 528 529 idx = h1; 530 #if defined(PETSC_USE_DEBUG) 531 insert_ct++; 532 total_ct++; 533 if (HT[idx] != key) { 534 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 535 if (idx == size) { 536 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 537 if (idx == h1) { 538 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 539 } 540 } 541 } 542 #else 543 if (HT[idx] != key) { 544 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 545 if (idx == size) { 546 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 547 if (idx == h1) { 548 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 549 } 550 } 551 } 552 #endif 553 /* A HASH table entry is found, so insert the values at the correct address */ 554 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 555 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 556 } 557 } else { 558 if (!baij->donotstash) { 559 if (roworiented) { 560 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 561 } else { 562 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 563 } 564 } 565 } 566 } 567 #if defined(PETSC_USE_DEBUG) 568 baij->ht_total_ct = total_ct; 569 baij->ht_insert_ct = insert_ct; 570 #endif 571 PetscFunctionReturn(0); 572 } 573 574 #undef __FUNCT__ 575 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 576 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 577 { 578 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 579 PetscTruth roworiented = baij->roworiented; 580 PetscErrorCode ierr; 581 PetscInt i,j,ii,jj,row,col; 582 PetscInt rstart=baij->rstartbs; 583 PetscInt rend=mat->rmap.rend,stepval,bs=mat->rmap.bs,bs2=baij->bs2; 584 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 585 PetscReal tmp; 586 MatScalar **HD = baij->hd,*baij_a; 587 const MatScalar *v_t,*value; 588 #if defined(PETSC_USE_DEBUG) 589 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 590 #endif 591 592 PetscFunctionBegin; 593 594 if (roworiented) { 595 stepval = (n-1)*bs; 596 } else { 597 stepval = (m-1)*bs; 598 } 599 for (i=0; i<m; i++) { 600 #if defined(PETSC_USE_DEBUG) 601 if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 602 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 603 #endif 604 row = im[i]; 605 v_t = v + i*bs2; 606 if (row >= rstart && row < rend) { 607 for (j=0; j<n; j++) { 608 col = in[j]; 609 610 /* Look up into the Hash Table */ 611 key = row*Nbs+col+1; 612 h1 = HASH(size,key,tmp); 613 614 idx = h1; 615 #if defined(PETSC_USE_DEBUG) 616 total_ct++; 617 insert_ct++; 618 if (HT[idx] != key) { 619 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 620 if (idx == size) { 621 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 622 if (idx == h1) { 623 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 624 } 625 } 626 } 627 #else 628 if (HT[idx] != key) { 629 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 630 if (idx == size) { 631 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 632 if (idx == h1) { 633 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 634 } 635 } 636 } 637 #endif 638 baij_a = HD[idx]; 639 if (roworiented) { 640 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 641 /* value = v + (i*(stepval+bs)+j)*bs; */ 642 value = v_t; 643 v_t += bs; 644 if (addv == ADD_VALUES) { 645 for (ii=0; ii<bs; ii++,value+=stepval) { 646 for (jj=ii; jj<bs2; jj+=bs) { 647 baij_a[jj] += *value++; 648 } 649 } 650 } else { 651 for (ii=0; ii<bs; ii++,value+=stepval) { 652 for (jj=ii; jj<bs2; jj+=bs) { 653 baij_a[jj] = *value++; 654 } 655 } 656 } 657 } else { 658 value = v + j*(stepval+bs)*bs + i*bs; 659 if (addv == ADD_VALUES) { 660 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 661 for (jj=0; jj<bs; jj++) { 662 baij_a[jj] += *value++; 663 } 664 } 665 } else { 666 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 667 for (jj=0; jj<bs; jj++) { 668 baij_a[jj] = *value++; 669 } 670 } 671 } 672 } 673 } 674 } else { 675 if (!baij->donotstash) { 676 if (roworiented) { 677 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 678 } else { 679 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 680 } 681 } 682 } 683 } 684 #if defined(PETSC_USE_DEBUG) 685 baij->ht_total_ct = total_ct; 686 baij->ht_insert_ct = insert_ct; 687 #endif 688 PetscFunctionReturn(0); 689 } 690 691 #undef __FUNCT__ 692 #define __FUNCT__ "MatGetValues_MPIBAIJ" 693 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 694 { 695 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 696 PetscErrorCode ierr; 697 PetscInt bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend; 698 PetscInt bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data; 699 700 PetscFunctionBegin; 701 for (i=0; i<m; i++) { 702 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 703 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 704 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 705 row = idxm[i] - bsrstart; 706 for (j=0; j<n; j++) { 707 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 708 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 709 if (idxn[j] >= bscstart && idxn[j] < bscend){ 710 col = idxn[j] - bscstart; 711 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 712 } else { 713 if (!baij->colmap) { 714 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 715 } 716 #if defined (PETSC_USE_CTABLE) 717 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 718 data --; 719 #else 720 data = baij->colmap[idxn[j]/bs]-1; 721 #endif 722 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 723 else { 724 col = data + idxn[j]%bs; 725 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 726 } 727 } 728 } 729 } else { 730 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 731 } 732 } 733 PetscFunctionReturn(0); 734 } 735 736 #undef __FUNCT__ 737 #define __FUNCT__ "MatNorm_MPIBAIJ" 738 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 739 { 740 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 741 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 742 PetscErrorCode ierr; 743 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap.bs,nz,row,col; 744 PetscReal sum = 0.0; 745 MatScalar *v; 746 747 PetscFunctionBegin; 748 if (baij->size == 1) { 749 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 750 } else { 751 if (type == NORM_FROBENIUS) { 752 v = amat->a; 753 nz = amat->nz*bs2; 754 for (i=0; i<nz; i++) { 755 #if defined(PETSC_USE_COMPLEX) 756 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 757 #else 758 sum += (*v)*(*v); v++; 759 #endif 760 } 761 v = bmat->a; 762 nz = bmat->nz*bs2; 763 for (i=0; i<nz; i++) { 764 #if defined(PETSC_USE_COMPLEX) 765 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 766 #else 767 sum += (*v)*(*v); v++; 768 #endif 769 } 770 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 771 *nrm = sqrt(*nrm); 772 } else if (type == NORM_1) { /* max column sum */ 773 PetscReal *tmp,*tmp2; 774 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 775 ierr = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 776 tmp2 = tmp + mat->cmap.N; 777 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 778 v = amat->a; jj = amat->j; 779 for (i=0; i<amat->nz; i++) { 780 for (j=0; j<bs; j++){ 781 col = bs*(cstart + *jj) + j; /* column index */ 782 for (row=0; row<bs; row++){ 783 tmp[col] += PetscAbsScalar(*v); v++; 784 } 785 } 786 jj++; 787 } 788 v = bmat->a; jj = bmat->j; 789 for (i=0; i<bmat->nz; i++) { 790 for (j=0; j<bs; j++){ 791 col = bs*garray[*jj] + j; 792 for (row=0; row<bs; row++){ 793 tmp[col] += PetscAbsScalar(*v); v++; 794 } 795 } 796 jj++; 797 } 798 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 799 *nrm = 0.0; 800 for (j=0; j<mat->cmap.N; j++) { 801 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 802 } 803 ierr = PetscFree(tmp);CHKERRQ(ierr); 804 } else if (type == NORM_INFINITY) { /* max row sum */ 805 PetscReal *sums; 806 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr) 807 sum = 0.0; 808 for (j=0; j<amat->mbs; j++) { 809 for (row=0; row<bs; row++) sums[row] = 0.0; 810 v = amat->a + bs2*amat->i[j]; 811 nz = amat->i[j+1]-amat->i[j]; 812 for (i=0; i<nz; i++) { 813 for (col=0; col<bs; col++){ 814 for (row=0; row<bs; row++){ 815 sums[row] += PetscAbsScalar(*v); v++; 816 } 817 } 818 } 819 v = bmat->a + bs2*bmat->i[j]; 820 nz = bmat->i[j+1]-bmat->i[j]; 821 for (i=0; i<nz; i++) { 822 for (col=0; col<bs; col++){ 823 for (row=0; row<bs; row++){ 824 sums[row] += PetscAbsScalar(*v); v++; 825 } 826 } 827 } 828 for (row=0; row<bs; row++){ 829 if (sums[row] > sum) sum = sums[row]; 830 } 831 } 832 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 833 ierr = PetscFree(sums);CHKERRQ(ierr); 834 } else { 835 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 836 } 837 } 838 PetscFunctionReturn(0); 839 } 840 841 /* 842 Creates the hash table, and sets the table 843 This table is created only once. 844 If new entried need to be added to the matrix 845 then the hash table has to be destroyed and 846 recreated. 847 */ 848 #undef __FUNCT__ 849 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 850 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 851 { 852 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 853 Mat A = baij->A,B=baij->B; 854 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 855 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 856 PetscErrorCode ierr; 857 PetscInt size,bs2=baij->bs2,rstart=baij->rstartbs; 858 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 859 PetscInt *HT,key; 860 MatScalar **HD; 861 PetscReal tmp; 862 #if defined(PETSC_USE_INFO) 863 PetscInt ct=0,max=0; 864 #endif 865 866 PetscFunctionBegin; 867 baij->ht_size=(PetscInt)(factor*nz); 868 size = baij->ht_size; 869 870 if (baij->ht) { 871 PetscFunctionReturn(0); 872 } 873 874 /* Allocate Memory for Hash Table */ 875 ierr = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 876 baij->ht = (PetscInt*)(baij->hd + size); 877 HD = baij->hd; 878 HT = baij->ht; 879 880 881 ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr); 882 883 884 /* Loop Over A */ 885 for (i=0; i<a->mbs; i++) { 886 for (j=ai[i]; j<ai[i+1]; j++) { 887 row = i+rstart; 888 col = aj[j]+cstart; 889 890 key = row*Nbs + col + 1; 891 h1 = HASH(size,key,tmp); 892 for (k=0; k<size; k++){ 893 if (!HT[(h1+k)%size]) { 894 HT[(h1+k)%size] = key; 895 HD[(h1+k)%size] = a->a + j*bs2; 896 break; 897 #if defined(PETSC_USE_INFO) 898 } else { 899 ct++; 900 #endif 901 } 902 } 903 #if defined(PETSC_USE_INFO) 904 if (k> max) max = k; 905 #endif 906 } 907 } 908 /* Loop Over B */ 909 for (i=0; i<b->mbs; i++) { 910 for (j=bi[i]; j<bi[i+1]; j++) { 911 row = i+rstart; 912 col = garray[bj[j]]; 913 key = row*Nbs + col + 1; 914 h1 = HASH(size,key,tmp); 915 for (k=0; k<size; k++){ 916 if (!HT[(h1+k)%size]) { 917 HT[(h1+k)%size] = key; 918 HD[(h1+k)%size] = b->a + j*bs2; 919 break; 920 #if defined(PETSC_USE_INFO) 921 } else { 922 ct++; 923 #endif 924 } 925 } 926 #if defined(PETSC_USE_INFO) 927 if (k> max) max = k; 928 #endif 929 } 930 } 931 932 /* Print Summary */ 933 #if defined(PETSC_USE_INFO) 934 for (i=0,j=0; i<size; i++) { 935 if (HT[i]) {j++;} 936 } 937 ierr = PetscInfo2(0,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 938 #endif 939 PetscFunctionReturn(0); 940 } 941 942 #undef __FUNCT__ 943 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 944 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 945 { 946 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 947 PetscErrorCode ierr; 948 PetscInt nstash,reallocs; 949 InsertMode addv; 950 951 PetscFunctionBegin; 952 if (baij->donotstash) { 953 PetscFunctionReturn(0); 954 } 955 956 /* make sure all processors are either in INSERTMODE or ADDMODE */ 957 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 958 if (addv == (ADD_VALUES|INSERT_VALUES)) { 959 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 960 } 961 mat->insertmode = addv; /* in case this processor had no cache */ 962 963 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 964 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rangebs);CHKERRQ(ierr); 965 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 966 ierr = PetscInfo2(0,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 967 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 968 ierr = PetscInfo2(0,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 969 PetscFunctionReturn(0); 970 } 971 972 #undef __FUNCT__ 973 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 974 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 975 { 976 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 977 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 978 PetscErrorCode ierr; 979 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 980 PetscInt *row,*col,other_disassembled; 981 PetscTruth r1,r2,r3; 982 MatScalar *val; 983 InsertMode addv = mat->insertmode; 984 PetscMPIInt n; 985 986 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 987 PetscFunctionBegin; 988 if (!baij->donotstash) { 989 while (1) { 990 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 991 if (!flg) break; 992 993 for (i=0; i<n;) { 994 /* Now identify the consecutive vals belonging to the same row */ 995 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 996 if (j < n) ncols = j-i; 997 else ncols = n-i; 998 /* Now assemble all these values with a single function call */ 999 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 1000 i = j; 1001 } 1002 } 1003 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 1004 /* Now process the block-stash. Since the values are stashed column-oriented, 1005 set the roworiented flag to column oriented, and after MatSetValues() 1006 restore the original flags */ 1007 r1 = baij->roworiented; 1008 r2 = a->roworiented; 1009 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 1010 baij->roworiented = PETSC_FALSE; 1011 a->roworiented = PETSC_FALSE; 1012 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 1013 while (1) { 1014 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1015 if (!flg) break; 1016 1017 for (i=0; i<n;) { 1018 /* Now identify the consecutive vals belonging to the same row */ 1019 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1020 if (j < n) ncols = j-i; 1021 else ncols = n-i; 1022 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1023 i = j; 1024 } 1025 } 1026 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1027 baij->roworiented = r1; 1028 a->roworiented = r2; 1029 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 1030 } 1031 1032 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1033 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1034 1035 /* determine if any processor has disassembled, if so we must 1036 also disassemble ourselfs, in order that we may reassemble. */ 1037 /* 1038 if nonzero structure of submatrix B cannot change then we know that 1039 no processor disassembled thus we can skip this stuff 1040 */ 1041 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1042 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1043 if (mat->was_assembled && !other_disassembled) { 1044 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1045 } 1046 } 1047 1048 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1049 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1050 } 1051 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 1052 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1053 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1054 1055 #if defined(PETSC_USE_INFO) 1056 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1057 ierr = PetscInfo1(0,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 1058 baij->ht_total_ct = 0; 1059 baij->ht_insert_ct = 0; 1060 } 1061 #endif 1062 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1063 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1064 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1065 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1066 } 1067 1068 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1069 baij->rowvalues = 0; 1070 PetscFunctionReturn(0); 1071 } 1072 1073 #undef __FUNCT__ 1074 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 1075 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1076 { 1077 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1078 PetscErrorCode ierr; 1079 PetscMPIInt size = baij->size,rank = baij->rank; 1080 PetscInt bs = mat->rmap.bs; 1081 PetscTruth iascii,isdraw; 1082 PetscViewer sviewer; 1083 PetscViewerFormat format; 1084 1085 PetscFunctionBegin; 1086 /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */ 1087 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1088 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1089 if (iascii) { 1090 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1091 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1092 MatInfo info; 1093 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1094 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1095 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 1096 rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 1097 mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr); 1098 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1099 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1100 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1101 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1102 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1103 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1104 PetscFunctionReturn(0); 1105 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1106 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1107 PetscFunctionReturn(0); 1108 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1109 PetscFunctionReturn(0); 1110 } 1111 } 1112 1113 if (isdraw) { 1114 PetscDraw draw; 1115 PetscTruth isnull; 1116 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1117 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1118 } 1119 1120 if (size == 1) { 1121 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 1122 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1123 } else { 1124 /* assemble the entire matrix onto first processor. */ 1125 Mat A; 1126 Mat_SeqBAIJ *Aloc; 1127 PetscInt M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1128 MatScalar *a; 1129 1130 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1131 /* Perhaps this should be the type of mat? */ 1132 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 1133 if (!rank) { 1134 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1135 } else { 1136 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1137 } 1138 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1139 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1140 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1141 1142 /* copy over the A part */ 1143 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1144 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1145 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1146 1147 for (i=0; i<mbs; i++) { 1148 rvals[0] = bs*(baij->rstartbs + i); 1149 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1150 for (j=ai[i]; j<ai[i+1]; j++) { 1151 col = (baij->cstartbs+aj[j])*bs; 1152 for (k=0; k<bs; k++) { 1153 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1154 col++; a += bs; 1155 } 1156 } 1157 } 1158 /* copy over the B part */ 1159 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1160 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1161 for (i=0; i<mbs; i++) { 1162 rvals[0] = bs*(baij->rstartbs + i); 1163 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1164 for (j=ai[i]; j<ai[i+1]; j++) { 1165 col = baij->garray[aj[j]]*bs; 1166 for (k=0; k<bs; k++) { 1167 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1168 col++; a += bs; 1169 } 1170 } 1171 } 1172 ierr = PetscFree(rvals);CHKERRQ(ierr); 1173 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1174 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1175 /* 1176 Everyone has to call to draw the matrix since the graphics waits are 1177 synchronized across all processors that share the PetscDraw object 1178 */ 1179 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1180 if (!rank) { 1181 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1182 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1183 } 1184 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1185 ierr = MatDestroy(A);CHKERRQ(ierr); 1186 } 1187 PetscFunctionReturn(0); 1188 } 1189 1190 #undef __FUNCT__ 1191 #define __FUNCT__ "MatView_MPIBAIJ" 1192 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1193 { 1194 PetscErrorCode ierr; 1195 PetscTruth iascii,isdraw,issocket,isbinary; 1196 1197 PetscFunctionBegin; 1198 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1199 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1200 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1201 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1202 if (iascii || isdraw || issocket || isbinary) { 1203 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1204 } else { 1205 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1206 } 1207 PetscFunctionReturn(0); 1208 } 1209 1210 #undef __FUNCT__ 1211 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1212 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1213 { 1214 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1215 PetscErrorCode ierr; 1216 1217 PetscFunctionBegin; 1218 #if defined(PETSC_USE_LOG) 1219 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N); 1220 #endif 1221 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1222 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1223 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1224 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1225 #if defined (PETSC_USE_CTABLE) 1226 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1227 #else 1228 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1229 #endif 1230 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1231 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1232 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1233 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1234 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1235 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 1236 #if defined(PETSC_USE_MAT_SINGLE) 1237 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 1238 #endif 1239 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1240 ierr = PetscFree(baij);CHKERRQ(ierr); 1241 1242 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1243 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1244 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1245 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1246 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1247 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1248 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1249 PetscFunctionReturn(0); 1250 } 1251 1252 #undef __FUNCT__ 1253 #define __FUNCT__ "MatMult_MPIBAIJ" 1254 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1255 { 1256 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1257 PetscErrorCode ierr; 1258 PetscInt nt; 1259 1260 PetscFunctionBegin; 1261 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1262 if (nt != A->cmap.n) { 1263 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1264 } 1265 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1266 if (nt != A->rmap.n) { 1267 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1268 } 1269 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1270 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1271 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1272 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1273 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1274 PetscFunctionReturn(0); 1275 } 1276 1277 #undef __FUNCT__ 1278 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1279 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1280 { 1281 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1282 PetscErrorCode ierr; 1283 1284 PetscFunctionBegin; 1285 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1286 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1287 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1288 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1289 PetscFunctionReturn(0); 1290 } 1291 1292 #undef __FUNCT__ 1293 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1294 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1295 { 1296 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1297 PetscErrorCode ierr; 1298 PetscTruth merged; 1299 1300 PetscFunctionBegin; 1301 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1302 /* do nondiagonal part */ 1303 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1304 if (!merged) { 1305 /* send it on its way */ 1306 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1307 /* do local part */ 1308 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1309 /* receive remote parts: note this assumes the values are not actually */ 1310 /* inserted in yy until the next line */ 1311 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1312 } else { 1313 /* do local part */ 1314 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1315 /* send it on its way */ 1316 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1317 /* values actually were received in the Begin() but we need to call this nop */ 1318 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1319 } 1320 PetscFunctionReturn(0); 1321 } 1322 1323 #undef __FUNCT__ 1324 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1325 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1326 { 1327 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1328 PetscErrorCode ierr; 1329 1330 PetscFunctionBegin; 1331 /* do nondiagonal part */ 1332 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1333 /* send it on its way */ 1334 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1335 /* do local part */ 1336 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1337 /* receive remote parts: note this assumes the values are not actually */ 1338 /* inserted in yy until the next line, which is true for my implementation*/ 1339 /* but is not perhaps always true. */ 1340 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1341 PetscFunctionReturn(0); 1342 } 1343 1344 /* 1345 This only works correctly for square matrices where the subblock A->A is the 1346 diagonal block 1347 */ 1348 #undef __FUNCT__ 1349 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1350 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1351 { 1352 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1353 PetscErrorCode ierr; 1354 1355 PetscFunctionBegin; 1356 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1357 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1358 PetscFunctionReturn(0); 1359 } 1360 1361 #undef __FUNCT__ 1362 #define __FUNCT__ "MatScale_MPIBAIJ" 1363 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1364 { 1365 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1366 PetscErrorCode ierr; 1367 1368 PetscFunctionBegin; 1369 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1370 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1371 PetscFunctionReturn(0); 1372 } 1373 1374 #undef __FUNCT__ 1375 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1376 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1377 { 1378 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1379 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1380 PetscErrorCode ierr; 1381 PetscInt bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1382 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend; 1383 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1384 1385 PetscFunctionBegin; 1386 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1387 mat->getrowactive = PETSC_TRUE; 1388 1389 if (!mat->rowvalues && (idx || v)) { 1390 /* 1391 allocate enough space to hold information from the longest row. 1392 */ 1393 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1394 PetscInt max = 1,mbs = mat->mbs,tmp; 1395 for (i=0; i<mbs; i++) { 1396 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1397 if (max < tmp) { max = tmp; } 1398 } 1399 ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1400 mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1401 } 1402 1403 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1404 lrow = row - brstart; 1405 1406 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1407 if (!v) {pvA = 0; pvB = 0;} 1408 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1409 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1410 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1411 nztot = nzA + nzB; 1412 1413 cmap = mat->garray; 1414 if (v || idx) { 1415 if (nztot) { 1416 /* Sort by increasing column numbers, assuming A and B already sorted */ 1417 PetscInt imark = -1; 1418 if (v) { 1419 *v = v_p = mat->rowvalues; 1420 for (i=0; i<nzB; i++) { 1421 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1422 else break; 1423 } 1424 imark = i; 1425 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1426 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1427 } 1428 if (idx) { 1429 *idx = idx_p = mat->rowindices; 1430 if (imark > -1) { 1431 for (i=0; i<imark; i++) { 1432 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1433 } 1434 } else { 1435 for (i=0; i<nzB; i++) { 1436 if (cmap[cworkB[i]/bs] < cstart) 1437 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1438 else break; 1439 } 1440 imark = i; 1441 } 1442 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1443 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1444 } 1445 } else { 1446 if (idx) *idx = 0; 1447 if (v) *v = 0; 1448 } 1449 } 1450 *nz = nztot; 1451 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1452 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1453 PetscFunctionReturn(0); 1454 } 1455 1456 #undef __FUNCT__ 1457 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1458 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1459 { 1460 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1461 1462 PetscFunctionBegin; 1463 if (!baij->getrowactive) { 1464 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1465 } 1466 baij->getrowactive = PETSC_FALSE; 1467 PetscFunctionReturn(0); 1468 } 1469 1470 #undef __FUNCT__ 1471 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1472 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1473 { 1474 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1475 PetscErrorCode ierr; 1476 1477 PetscFunctionBegin; 1478 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1479 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1480 PetscFunctionReturn(0); 1481 } 1482 1483 #undef __FUNCT__ 1484 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1485 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1486 { 1487 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1488 Mat A = a->A,B = a->B; 1489 PetscErrorCode ierr; 1490 PetscReal isend[5],irecv[5]; 1491 1492 PetscFunctionBegin; 1493 info->block_size = (PetscReal)matin->rmap.bs; 1494 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1495 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1496 isend[3] = info->memory; isend[4] = info->mallocs; 1497 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1498 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1499 isend[3] += info->memory; isend[4] += info->mallocs; 1500 if (flag == MAT_LOCAL) { 1501 info->nz_used = isend[0]; 1502 info->nz_allocated = isend[1]; 1503 info->nz_unneeded = isend[2]; 1504 info->memory = isend[3]; 1505 info->mallocs = isend[4]; 1506 } else if (flag == MAT_GLOBAL_MAX) { 1507 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1508 info->nz_used = irecv[0]; 1509 info->nz_allocated = irecv[1]; 1510 info->nz_unneeded = irecv[2]; 1511 info->memory = irecv[3]; 1512 info->mallocs = irecv[4]; 1513 } else if (flag == MAT_GLOBAL_SUM) { 1514 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1515 info->nz_used = irecv[0]; 1516 info->nz_allocated = irecv[1]; 1517 info->nz_unneeded = irecv[2]; 1518 info->memory = irecv[3]; 1519 info->mallocs = irecv[4]; 1520 } else { 1521 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1522 } 1523 info->rows_global = (PetscReal)A->rmap.N; 1524 info->columns_global = (PetscReal)A->cmap.N; 1525 info->rows_local = (PetscReal)A->rmap.N; 1526 info->columns_local = (PetscReal)A->cmap.N; 1527 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1528 info->fill_ratio_needed = 0; 1529 info->factor_mallocs = 0; 1530 PetscFunctionReturn(0); 1531 } 1532 1533 #undef __FUNCT__ 1534 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1535 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op) 1536 { 1537 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1538 PetscErrorCode ierr; 1539 1540 PetscFunctionBegin; 1541 switch (op) { 1542 case MAT_NO_NEW_NONZERO_LOCATIONS: 1543 case MAT_YES_NEW_NONZERO_LOCATIONS: 1544 case MAT_COLUMNS_UNSORTED: 1545 case MAT_COLUMNS_SORTED: 1546 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1547 case MAT_KEEP_ZEROED_ROWS: 1548 case MAT_NEW_NONZERO_LOCATION_ERR: 1549 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1550 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1551 break; 1552 case MAT_ROW_ORIENTED: 1553 a->roworiented = PETSC_TRUE; 1554 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1555 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1556 break; 1557 case MAT_ROWS_SORTED: 1558 case MAT_ROWS_UNSORTED: 1559 case MAT_YES_NEW_DIAGONALS: 1560 ierr = PetscInfo(A,"Option ignored\n");CHKERRQ(ierr); 1561 break; 1562 case MAT_COLUMN_ORIENTED: 1563 a->roworiented = PETSC_FALSE; 1564 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1565 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1566 break; 1567 case MAT_IGNORE_OFF_PROC_ENTRIES: 1568 a->donotstash = PETSC_TRUE; 1569 break; 1570 case MAT_NO_NEW_DIAGONALS: 1571 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1572 case MAT_USE_HASH_TABLE: 1573 a->ht_flag = PETSC_TRUE; 1574 break; 1575 case MAT_SYMMETRIC: 1576 case MAT_STRUCTURALLY_SYMMETRIC: 1577 case MAT_HERMITIAN: 1578 case MAT_SYMMETRY_ETERNAL: 1579 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1580 break; 1581 case MAT_NOT_SYMMETRIC: 1582 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1583 case MAT_NOT_HERMITIAN: 1584 case MAT_NOT_SYMMETRY_ETERNAL: 1585 break; 1586 default: 1587 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1588 } 1589 PetscFunctionReturn(0); 1590 } 1591 1592 #undef __FUNCT__ 1593 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1594 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1595 { 1596 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1597 Mat_SeqBAIJ *Aloc; 1598 Mat B; 1599 PetscErrorCode ierr; 1600 PetscInt M=A->rmap.N,N=A->cmap.N,*ai,*aj,i,*rvals,j,k,col; 1601 PetscInt bs=A->rmap.bs,mbs=baij->mbs; 1602 MatScalar *a; 1603 1604 PetscFunctionBegin; 1605 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1606 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1607 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1608 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1609 ierr = MatMPIBAIJSetPreallocation(B,A->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1610 1611 /* copy over the A part */ 1612 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1613 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1614 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1615 1616 for (i=0; i<mbs; i++) { 1617 rvals[0] = bs*(baij->rstartbs + i); 1618 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1619 for (j=ai[i]; j<ai[i+1]; j++) { 1620 col = (baij->cstartbs+aj[j])*bs; 1621 for (k=0; k<bs; k++) { 1622 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1623 col++; a += bs; 1624 } 1625 } 1626 } 1627 /* copy over the B part */ 1628 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1629 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1630 for (i=0; i<mbs; i++) { 1631 rvals[0] = bs*(baij->rstartbs + i); 1632 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1633 for (j=ai[i]; j<ai[i+1]; j++) { 1634 col = baij->garray[aj[j]]*bs; 1635 for (k=0; k<bs; k++) { 1636 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1637 col++; a += bs; 1638 } 1639 } 1640 } 1641 ierr = PetscFree(rvals);CHKERRQ(ierr); 1642 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1643 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1644 1645 if (matout) { 1646 *matout = B; 1647 } else { 1648 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1649 } 1650 PetscFunctionReturn(0); 1651 } 1652 1653 #undef __FUNCT__ 1654 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1655 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1656 { 1657 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1658 Mat a = baij->A,b = baij->B; 1659 PetscErrorCode ierr; 1660 PetscInt s1,s2,s3; 1661 1662 PetscFunctionBegin; 1663 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1664 if (rr) { 1665 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1666 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1667 /* Overlap communication with computation. */ 1668 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1669 } 1670 if (ll) { 1671 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1672 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1673 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1674 } 1675 /* scale the diagonal block */ 1676 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1677 1678 if (rr) { 1679 /* Do a scatter end and then right scale the off-diagonal block */ 1680 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1681 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1682 } 1683 1684 PetscFunctionReturn(0); 1685 } 1686 1687 #undef __FUNCT__ 1688 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1689 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 1690 { 1691 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1692 PetscErrorCode ierr; 1693 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1694 PetscInt i,*owners = A->rmap.range; 1695 PetscInt *nprocs,j,idx,nsends,row; 1696 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1697 PetscInt *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1; 1698 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap.rstart; 1699 MPI_Comm comm = A->comm; 1700 MPI_Request *send_waits,*recv_waits; 1701 MPI_Status recv_status,*send_status; 1702 #if defined(PETSC_DEBUG) 1703 PetscTruth found = PETSC_FALSE; 1704 #endif 1705 1706 PetscFunctionBegin; 1707 /* first count number of contributors to each processor */ 1708 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1709 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1710 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1711 j = 0; 1712 for (i=0; i<N; i++) { 1713 if (lastidx > (idx = rows[i])) j = 0; 1714 lastidx = idx; 1715 for (; j<size; j++) { 1716 if (idx >= owners[j] && idx < owners[j+1]) { 1717 nprocs[2*j]++; 1718 nprocs[2*j+1] = 1; 1719 owner[i] = j; 1720 #if defined(PETSC_DEBUG) 1721 found = PETSC_TRUE; 1722 #endif 1723 break; 1724 } 1725 } 1726 #if defined(PETSC_DEBUG) 1727 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1728 found = PETSC_FALSE; 1729 #endif 1730 } 1731 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1732 1733 /* inform other processors of number of messages and max length*/ 1734 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1735 1736 /* post receives: */ 1737 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1738 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1739 for (i=0; i<nrecvs; i++) { 1740 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1741 } 1742 1743 /* do sends: 1744 1) starts[i] gives the starting index in svalues for stuff going to 1745 the ith processor 1746 */ 1747 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1748 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1749 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1750 starts[0] = 0; 1751 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1752 for (i=0; i<N; i++) { 1753 svalues[starts[owner[i]]++] = rows[i]; 1754 } 1755 1756 starts[0] = 0; 1757 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1758 count = 0; 1759 for (i=0; i<size; i++) { 1760 if (nprocs[2*i+1]) { 1761 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1762 } 1763 } 1764 ierr = PetscFree(starts);CHKERRQ(ierr); 1765 1766 base = owners[rank]; 1767 1768 /* wait on receives */ 1769 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 1770 source = lens + nrecvs; 1771 count = nrecvs; slen = 0; 1772 while (count) { 1773 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1774 /* unpack receives into our local space */ 1775 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1776 source[imdex] = recv_status.MPI_SOURCE; 1777 lens[imdex] = n; 1778 slen += n; 1779 count--; 1780 } 1781 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1782 1783 /* move the data into the send scatter */ 1784 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1785 count = 0; 1786 for (i=0; i<nrecvs; i++) { 1787 values = rvalues + i*nmax; 1788 for (j=0; j<lens[i]; j++) { 1789 lrows[count++] = values[j] - base; 1790 } 1791 } 1792 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1793 ierr = PetscFree(lens);CHKERRQ(ierr); 1794 ierr = PetscFree(owner);CHKERRQ(ierr); 1795 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1796 1797 /* actually zap the local rows */ 1798 /* 1799 Zero the required rows. If the "diagonal block" of the matrix 1800 is square and the user wishes to set the diagonal we use separate 1801 code so that MatSetValues() is not called for each diagonal allocating 1802 new memory, thus calling lots of mallocs and slowing things down. 1803 1804 Contributed by: Matthew Knepley 1805 */ 1806 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1807 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr); 1808 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 1809 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr); 1810 } else if (diag != 0.0) { 1811 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1812 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1813 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1814 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1815 } 1816 for (i=0; i<slen; i++) { 1817 row = lrows[i] + rstart_bs; 1818 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1819 } 1820 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1821 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1822 } else { 1823 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1824 } 1825 1826 ierr = PetscFree(lrows);CHKERRQ(ierr); 1827 1828 /* wait on sends */ 1829 if (nsends) { 1830 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1831 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1832 ierr = PetscFree(send_status);CHKERRQ(ierr); 1833 } 1834 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1835 ierr = PetscFree(svalues);CHKERRQ(ierr); 1836 1837 PetscFunctionReturn(0); 1838 } 1839 1840 #undef __FUNCT__ 1841 #define __FUNCT__ "MatPrintHelp_MPIBAIJ" 1842 PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A) 1843 { 1844 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1845 MPI_Comm comm = A->comm; 1846 static PetscTruth called = PETSC_FALSE; 1847 PetscErrorCode ierr; 1848 1849 PetscFunctionBegin; 1850 if (!a->rank) { 1851 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1852 } 1853 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1854 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1855 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1856 PetscFunctionReturn(0); 1857 } 1858 1859 #undef __FUNCT__ 1860 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1861 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1862 { 1863 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1864 PetscErrorCode ierr; 1865 1866 PetscFunctionBegin; 1867 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1868 PetscFunctionReturn(0); 1869 } 1870 1871 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1872 1873 #undef __FUNCT__ 1874 #define __FUNCT__ "MatEqual_MPIBAIJ" 1875 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1876 { 1877 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1878 Mat a,b,c,d; 1879 PetscTruth flg; 1880 PetscErrorCode ierr; 1881 1882 PetscFunctionBegin; 1883 a = matA->A; b = matA->B; 1884 c = matB->A; d = matB->B; 1885 1886 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1887 if (flg) { 1888 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1889 } 1890 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1891 PetscFunctionReturn(0); 1892 } 1893 1894 #undef __FUNCT__ 1895 #define __FUNCT__ "MatCopy_MPIBAIJ" 1896 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1897 { 1898 PetscErrorCode ierr; 1899 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1900 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1901 1902 PetscFunctionBegin; 1903 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1904 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1905 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1906 } else { 1907 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1908 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1909 } 1910 PetscFunctionReturn(0); 1911 } 1912 1913 #undef __FUNCT__ 1914 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1915 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1916 { 1917 PetscErrorCode ierr; 1918 1919 PetscFunctionBegin; 1920 ierr = MatMPIBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1921 PetscFunctionReturn(0); 1922 } 1923 1924 #include "petscblaslapack.h" 1925 #undef __FUNCT__ 1926 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1927 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1928 { 1929 PetscErrorCode ierr; 1930 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1931 PetscBLASInt bnz,one=1; 1932 Mat_SeqBAIJ *x,*y; 1933 1934 PetscFunctionBegin; 1935 if (str == SAME_NONZERO_PATTERN) { 1936 PetscScalar alpha = a; 1937 x = (Mat_SeqBAIJ *)xx->A->data; 1938 y = (Mat_SeqBAIJ *)yy->A->data; 1939 bnz = (PetscBLASInt)x->nz; 1940 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1941 x = (Mat_SeqBAIJ *)xx->B->data; 1942 y = (Mat_SeqBAIJ *)yy->B->data; 1943 bnz = (PetscBLASInt)x->nz; 1944 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1945 } else { 1946 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1947 } 1948 PetscFunctionReturn(0); 1949 } 1950 1951 #undef __FUNCT__ 1952 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1953 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1954 { 1955 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1956 PetscErrorCode ierr; 1957 1958 PetscFunctionBegin; 1959 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1960 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1961 PetscFunctionReturn(0); 1962 } 1963 1964 #undef __FUNCT__ 1965 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 1966 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1967 { 1968 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1969 PetscErrorCode ierr; 1970 1971 PetscFunctionBegin; 1972 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1973 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1974 PetscFunctionReturn(0); 1975 } 1976 1977 /* -------------------------------------------------------------------*/ 1978 static struct _MatOps MatOps_Values = { 1979 MatSetValues_MPIBAIJ, 1980 MatGetRow_MPIBAIJ, 1981 MatRestoreRow_MPIBAIJ, 1982 MatMult_MPIBAIJ, 1983 /* 4*/ MatMultAdd_MPIBAIJ, 1984 MatMultTranspose_MPIBAIJ, 1985 MatMultTransposeAdd_MPIBAIJ, 1986 0, 1987 0, 1988 0, 1989 /*10*/ 0, 1990 0, 1991 0, 1992 0, 1993 MatTranspose_MPIBAIJ, 1994 /*15*/ MatGetInfo_MPIBAIJ, 1995 MatEqual_MPIBAIJ, 1996 MatGetDiagonal_MPIBAIJ, 1997 MatDiagonalScale_MPIBAIJ, 1998 MatNorm_MPIBAIJ, 1999 /*20*/ MatAssemblyBegin_MPIBAIJ, 2000 MatAssemblyEnd_MPIBAIJ, 2001 0, 2002 MatSetOption_MPIBAIJ, 2003 MatZeroEntries_MPIBAIJ, 2004 /*25*/ MatZeroRows_MPIBAIJ, 2005 0, 2006 0, 2007 0, 2008 0, 2009 /*30*/ MatSetUpPreallocation_MPIBAIJ, 2010 0, 2011 0, 2012 0, 2013 0, 2014 /*35*/ MatDuplicate_MPIBAIJ, 2015 0, 2016 0, 2017 0, 2018 0, 2019 /*40*/ MatAXPY_MPIBAIJ, 2020 MatGetSubMatrices_MPIBAIJ, 2021 MatIncreaseOverlap_MPIBAIJ, 2022 MatGetValues_MPIBAIJ, 2023 MatCopy_MPIBAIJ, 2024 /*45*/ MatPrintHelp_MPIBAIJ, 2025 MatScale_MPIBAIJ, 2026 0, 2027 0, 2028 0, 2029 /*50*/ 0, 2030 0, 2031 0, 2032 0, 2033 0, 2034 /*55*/ 0, 2035 0, 2036 MatSetUnfactored_MPIBAIJ, 2037 0, 2038 MatSetValuesBlocked_MPIBAIJ, 2039 /*60*/ 0, 2040 MatDestroy_MPIBAIJ, 2041 MatView_MPIBAIJ, 2042 0, 2043 0, 2044 /*65*/ 0, 2045 0, 2046 0, 2047 0, 2048 0, 2049 /*70*/ MatGetRowMax_MPIBAIJ, 2050 0, 2051 0, 2052 0, 2053 0, 2054 /*75*/ 0, 2055 0, 2056 0, 2057 0, 2058 0, 2059 /*80*/ 0, 2060 0, 2061 0, 2062 0, 2063 MatLoad_MPIBAIJ, 2064 /*85*/ 0, 2065 0, 2066 0, 2067 0, 2068 0, 2069 /*90*/ 0, 2070 0, 2071 0, 2072 0, 2073 0, 2074 /*95*/ 0, 2075 0, 2076 0, 2077 0, 2078 0, 2079 /*100*/0, 2080 0, 2081 0, 2082 0, 2083 0, 2084 /*105*/0, 2085 MatRealPart_MPIBAIJ, 2086 MatImaginaryPart_MPIBAIJ}; 2087 2088 2089 EXTERN_C_BEGIN 2090 #undef __FUNCT__ 2091 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2092 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2093 { 2094 PetscFunctionBegin; 2095 *a = ((Mat_MPIBAIJ *)A->data)->A; 2096 *iscopy = PETSC_FALSE; 2097 PetscFunctionReturn(0); 2098 } 2099 EXTERN_C_END 2100 2101 EXTERN_C_BEGIN 2102 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2103 EXTERN_C_END 2104 2105 #undef __FUNCT__ 2106 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2107 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2108 { 2109 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)B->data; 2110 PetscInt m = B->rmap.n/bs,cstart = baij->cstartbs, cend = baij->cendbs,j,nnz,i,d; 2111 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = baij->rstartbs,ii; 2112 const PetscInt *JJ; 2113 PetscScalar *values; 2114 PetscErrorCode ierr; 2115 2116 PetscFunctionBegin; 2117 #if defined(PETSC_OPT_g) 2118 if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]); 2119 #endif 2120 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2121 o_nnz = d_nnz + m; 2122 2123 for (i=0; i<m; i++) { 2124 nnz = I[i+1]- I[i]; 2125 JJ = J + I[i]; 2126 nnz_max = PetscMax(nnz_max,nnz); 2127 #if defined(PETSC_OPT_g) 2128 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2129 #endif 2130 for (j=0; j<nnz; j++) { 2131 if (*JJ >= cstart) break; 2132 JJ++; 2133 } 2134 d = 0; 2135 for (; j<nnz; j++) { 2136 if (*JJ++ >= cend) break; 2137 d++; 2138 } 2139 d_nnz[i] = d; 2140 o_nnz[i] = nnz - d; 2141 } 2142 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2143 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2144 2145 if (v) values = (PetscScalar*)v; 2146 else { 2147 ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2148 ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2149 } 2150 2151 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2152 for (i=0; i<m; i++) { 2153 ii = i + rstart; 2154 nnz = I[i+1]- I[i]; 2155 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2156 } 2157 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2158 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2159 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2160 2161 if (!v) { 2162 ierr = PetscFree(values);CHKERRQ(ierr); 2163 } 2164 PetscFunctionReturn(0); 2165 } 2166 2167 #undef __FUNCT__ 2168 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2169 /*@C 2170 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2171 (the default parallel PETSc format). 2172 2173 Collective on MPI_Comm 2174 2175 Input Parameters: 2176 + A - the matrix 2177 . i - the indices into j for the start of each local row (starts with zero) 2178 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2179 - v - optional values in the matrix 2180 2181 Level: developer 2182 2183 .keywords: matrix, aij, compressed row, sparse, parallel 2184 2185 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2186 @*/ 2187 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2188 { 2189 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2190 2191 PetscFunctionBegin; 2192 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2193 if (f) { 2194 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2195 } 2196 PetscFunctionReturn(0); 2197 } 2198 2199 EXTERN_C_BEGIN 2200 #undef __FUNCT__ 2201 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2202 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2203 { 2204 Mat_MPIBAIJ *b; 2205 PetscErrorCode ierr; 2206 PetscInt i; 2207 2208 PetscFunctionBegin; 2209 B->preallocated = PETSC_TRUE; 2210 ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2211 2212 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2213 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2214 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2215 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2216 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2217 2218 B->rmap.bs = bs; 2219 B->cmap.bs = bs; 2220 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 2221 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 2222 2223 if (d_nnz) { 2224 for (i=0; i<B->rmap.n/bs; i++) { 2225 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2226 } 2227 } 2228 if (o_nnz) { 2229 for (i=0; i<B->rmap.n/bs; i++) { 2230 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2231 } 2232 } 2233 2234 b = (Mat_MPIBAIJ*)B->data; 2235 b->bs2 = bs*bs; 2236 b->mbs = B->rmap.n/bs; 2237 b->nbs = B->cmap.n/bs; 2238 b->Mbs = B->rmap.N/bs; 2239 b->Nbs = B->cmap.N/bs; 2240 2241 for (i=0; i<=b->size; i++) { 2242 b->rangebs[i] = B->rmap.range[i]/bs; 2243 } 2244 b->rstartbs = B->rmap.rstart/bs; 2245 b->rendbs = B->rmap.rend/bs; 2246 b->cstartbs = B->cmap.rstart/bs; 2247 b->cendbs = B->cmap.rend/bs; 2248 2249 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2250 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 2251 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2252 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2253 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2254 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2255 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 2256 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2257 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2258 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2259 2260 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2261 2262 PetscFunctionReturn(0); 2263 } 2264 EXTERN_C_END 2265 2266 EXTERN_C_BEGIN 2267 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2268 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2269 EXTERN_C_END 2270 2271 /*MC 2272 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2273 2274 Options Database Keys: 2275 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2276 2277 Level: beginner 2278 2279 .seealso: MatCreateMPIBAIJ 2280 M*/ 2281 2282 EXTERN_C_BEGIN 2283 #undef __FUNCT__ 2284 #define __FUNCT__ "MatCreate_MPIBAIJ" 2285 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 2286 { 2287 Mat_MPIBAIJ *b; 2288 PetscErrorCode ierr; 2289 PetscTruth flg; 2290 2291 PetscFunctionBegin; 2292 ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2293 B->data = (void*)b; 2294 2295 2296 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2297 B->mapping = 0; 2298 B->factor = 0; 2299 B->assembled = PETSC_FALSE; 2300 2301 B->insertmode = NOT_SET_VALUES; 2302 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 2303 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 2304 2305 /* build local table of row and column ownerships */ 2306 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 2307 2308 /* build cache for off array entries formed */ 2309 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2310 b->donotstash = PETSC_FALSE; 2311 b->colmap = PETSC_NULL; 2312 b->garray = PETSC_NULL; 2313 b->roworiented = PETSC_TRUE; 2314 2315 #if defined(PETSC_USE_MAT_SINGLE) 2316 /* stuff for MatSetValues_XXX in single precision */ 2317 b->setvalueslen = 0; 2318 b->setvaluescopy = PETSC_NULL; 2319 #endif 2320 2321 /* stuff used in block assembly */ 2322 b->barray = 0; 2323 2324 /* stuff used for matrix vector multiply */ 2325 b->lvec = 0; 2326 b->Mvctx = 0; 2327 2328 /* stuff for MatGetRow() */ 2329 b->rowindices = 0; 2330 b->rowvalues = 0; 2331 b->getrowactive = PETSC_FALSE; 2332 2333 /* hash table stuff */ 2334 b->ht = 0; 2335 b->hd = 0; 2336 b->ht_size = 0; 2337 b->ht_flag = PETSC_FALSE; 2338 b->ht_fact = 0; 2339 b->ht_total_ct = 0; 2340 b->ht_insert_ct = 0; 2341 2342 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2343 if (flg) { 2344 PetscReal fact = 1.39; 2345 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2346 ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2347 if (fact <= 1.0) fact = 1.39; 2348 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2349 ierr = PetscInfo1(0,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2350 } 2351 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2352 "MatStoreValues_MPIBAIJ", 2353 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2354 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2355 "MatRetrieveValues_MPIBAIJ", 2356 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2357 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2358 "MatGetDiagonalBlock_MPIBAIJ", 2359 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2360 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2361 "MatMPIBAIJSetPreallocation_MPIBAIJ", 2362 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2363 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 2364 "MatMPIBAIJSetPreallocationCSR_MPIAIJ", 2365 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 2366 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 2367 "MatDiagonalScaleLocal_MPIBAIJ", 2368 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2369 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 2370 "MatSetHashTableFactor_MPIBAIJ", 2371 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2372 PetscFunctionReturn(0); 2373 } 2374 EXTERN_C_END 2375 2376 /*MC 2377 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2378 2379 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2380 and MATMPIBAIJ otherwise. 2381 2382 Options Database Keys: 2383 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2384 2385 Level: beginner 2386 2387 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2388 M*/ 2389 2390 EXTERN_C_BEGIN 2391 #undef __FUNCT__ 2392 #define __FUNCT__ "MatCreate_BAIJ" 2393 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 2394 { 2395 PetscErrorCode ierr; 2396 PetscMPIInt size; 2397 2398 PetscFunctionBegin; 2399 ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr); 2400 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2401 if (size == 1) { 2402 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 2403 } else { 2404 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 2405 } 2406 PetscFunctionReturn(0); 2407 } 2408 EXTERN_C_END 2409 2410 #undef __FUNCT__ 2411 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2412 /*@C 2413 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2414 (block compressed row). For good matrix assembly performance 2415 the user should preallocate the matrix storage by setting the parameters 2416 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2417 performance can be increased by more than a factor of 50. 2418 2419 Collective on Mat 2420 2421 Input Parameters: 2422 + A - the matrix 2423 . bs - size of blockk 2424 . d_nz - number of block nonzeros per block row in diagonal portion of local 2425 submatrix (same for all local rows) 2426 . d_nnz - array containing the number of block nonzeros in the various block rows 2427 of the in diagonal portion of the local (possibly different for each block 2428 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2429 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2430 submatrix (same for all local rows). 2431 - o_nnz - array containing the number of nonzeros in the various block rows of the 2432 off-diagonal portion of the local submatrix (possibly different for 2433 each block row) or PETSC_NULL. 2434 2435 If the *_nnz parameter is given then the *_nz parameter is ignored 2436 2437 Options Database Keys: 2438 . -mat_no_unroll - uses code that does not unroll the loops in the 2439 block calculations (much slower) 2440 . -mat_block_size - size of the blocks to use 2441 2442 Notes: 2443 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2444 than it must be used on all processors that share the object for that argument. 2445 2446 Storage Information: 2447 For a square global matrix we define each processor's diagonal portion 2448 to be its local rows and the corresponding columns (a square submatrix); 2449 each processor's off-diagonal portion encompasses the remainder of the 2450 local matrix (a rectangular submatrix). 2451 2452 The user can specify preallocated storage for the diagonal part of 2453 the local submatrix with either d_nz or d_nnz (not both). Set 2454 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2455 memory allocation. Likewise, specify preallocated storage for the 2456 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2457 2458 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2459 the figure below we depict these three local rows and all columns (0-11). 2460 2461 .vb 2462 0 1 2 3 4 5 6 7 8 9 10 11 2463 ------------------- 2464 row 3 | o o o d d d o o o o o o 2465 row 4 | o o o d d d o o o o o o 2466 row 5 | o o o d d d o o o o o o 2467 ------------------- 2468 .ve 2469 2470 Thus, any entries in the d locations are stored in the d (diagonal) 2471 submatrix, and any entries in the o locations are stored in the 2472 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2473 stored simply in the MATSEQBAIJ format for compressed row storage. 2474 2475 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2476 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2477 In general, for PDE problems in which most nonzeros are near the diagonal, 2478 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2479 or you will get TERRIBLE performance; see the users' manual chapter on 2480 matrices. 2481 2482 Level: intermediate 2483 2484 .keywords: matrix, block, aij, compressed row, sparse, parallel 2485 2486 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 2487 @*/ 2488 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2489 { 2490 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2491 2492 PetscFunctionBegin; 2493 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2494 if (f) { 2495 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2496 } 2497 PetscFunctionReturn(0); 2498 } 2499 2500 #undef __FUNCT__ 2501 #define __FUNCT__ "MatCreateMPIBAIJ" 2502 /*@C 2503 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 2504 (block compressed row). For good matrix assembly performance 2505 the user should preallocate the matrix storage by setting the parameters 2506 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2507 performance can be increased by more than a factor of 50. 2508 2509 Collective on MPI_Comm 2510 2511 Input Parameters: 2512 + comm - MPI communicator 2513 . bs - size of blockk 2514 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2515 This value should be the same as the local size used in creating the 2516 y vector for the matrix-vector product y = Ax. 2517 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2518 This value should be the same as the local size used in creating the 2519 x vector for the matrix-vector product y = Ax. 2520 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2521 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2522 . d_nz - number of nonzero blocks per block row in diagonal portion of local 2523 submatrix (same for all local rows) 2524 . d_nnz - array containing the number of nonzero blocks in the various block rows 2525 of the in diagonal portion of the local (possibly different for each block 2526 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2527 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 2528 submatrix (same for all local rows). 2529 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 2530 off-diagonal portion of the local submatrix (possibly different for 2531 each block row) or PETSC_NULL. 2532 2533 Output Parameter: 2534 . A - the matrix 2535 2536 Options Database Keys: 2537 . -mat_no_unroll - uses code that does not unroll the loops in the 2538 block calculations (much slower) 2539 . -mat_block_size - size of the blocks to use 2540 2541 Notes: 2542 If the *_nnz parameter is given then the *_nz parameter is ignored 2543 2544 A nonzero block is any block that as 1 or more nonzeros in it 2545 2546 The user MUST specify either the local or global matrix dimensions 2547 (possibly both). 2548 2549 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2550 than it must be used on all processors that share the object for that argument. 2551 2552 Storage Information: 2553 For a square global matrix we define each processor's diagonal portion 2554 to be its local rows and the corresponding columns (a square submatrix); 2555 each processor's off-diagonal portion encompasses the remainder of the 2556 local matrix (a rectangular submatrix). 2557 2558 The user can specify preallocated storage for the diagonal part of 2559 the local submatrix with either d_nz or d_nnz (not both). Set 2560 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2561 memory allocation. Likewise, specify preallocated storage for the 2562 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2563 2564 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2565 the figure below we depict these three local rows and all columns (0-11). 2566 2567 .vb 2568 0 1 2 3 4 5 6 7 8 9 10 11 2569 ------------------- 2570 row 3 | o o o d d d o o o o o o 2571 row 4 | o o o d d d o o o o o o 2572 row 5 | o o o d d d o o o o o o 2573 ------------------- 2574 .ve 2575 2576 Thus, any entries in the d locations are stored in the d (diagonal) 2577 submatrix, and any entries in the o locations are stored in the 2578 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2579 stored simply in the MATSEQBAIJ format for compressed row storage. 2580 2581 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2582 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2583 In general, for PDE problems in which most nonzeros are near the diagonal, 2584 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2585 or you will get TERRIBLE performance; see the users' manual chapter on 2586 matrices. 2587 2588 Level: intermediate 2589 2590 .keywords: matrix, block, aij, compressed row, sparse, parallel 2591 2592 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2593 @*/ 2594 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2595 { 2596 PetscErrorCode ierr; 2597 PetscMPIInt size; 2598 2599 PetscFunctionBegin; 2600 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2601 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2602 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2603 if (size > 1) { 2604 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2605 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2606 } else { 2607 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2608 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2609 } 2610 PetscFunctionReturn(0); 2611 } 2612 2613 #undef __FUNCT__ 2614 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 2615 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2616 { 2617 Mat mat; 2618 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2619 PetscErrorCode ierr; 2620 PetscInt len=0; 2621 2622 PetscFunctionBegin; 2623 *newmat = 0; 2624 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 2625 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2626 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2627 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2628 2629 mat->factor = matin->factor; 2630 mat->preallocated = PETSC_TRUE; 2631 mat->assembled = PETSC_TRUE; 2632 mat->insertmode = NOT_SET_VALUES; 2633 2634 a = (Mat_MPIBAIJ*)mat->data; 2635 mat->rmap.bs = matin->rmap.bs; 2636 a->bs2 = oldmat->bs2; 2637 a->mbs = oldmat->mbs; 2638 a->nbs = oldmat->nbs; 2639 a->Mbs = oldmat->Mbs; 2640 a->Nbs = oldmat->Nbs; 2641 2642 ierr = PetscMapCopy(matin->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2643 ierr = PetscMapCopy(matin->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2644 2645 a->size = oldmat->size; 2646 a->rank = oldmat->rank; 2647 a->donotstash = oldmat->donotstash; 2648 a->roworiented = oldmat->roworiented; 2649 a->rowindices = 0; 2650 a->rowvalues = 0; 2651 a->getrowactive = PETSC_FALSE; 2652 a->barray = 0; 2653 a->rstartbs = oldmat->rstartbs; 2654 a->rendbs = oldmat->rendbs; 2655 a->cstartbs = oldmat->cstartbs; 2656 a->cendbs = oldmat->cendbs; 2657 2658 /* hash table stuff */ 2659 a->ht = 0; 2660 a->hd = 0; 2661 a->ht_size = 0; 2662 a->ht_flag = oldmat->ht_flag; 2663 a->ht_fact = oldmat->ht_fact; 2664 a->ht_total_ct = 0; 2665 a->ht_insert_ct = 0; 2666 2667 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 2668 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2669 ierr = MatStashCreate_Private(matin->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr); 2670 if (oldmat->colmap) { 2671 #if defined (PETSC_USE_CTABLE) 2672 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2673 #else 2674 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2675 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2676 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2677 #endif 2678 } else a->colmap = 0; 2679 2680 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2681 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2682 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2683 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2684 } else a->garray = 0; 2685 2686 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2687 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2688 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2689 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2690 2691 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2692 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2693 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2694 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2695 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2696 *newmat = mat; 2697 2698 PetscFunctionReturn(0); 2699 } 2700 2701 #include "petscsys.h" 2702 2703 #undef __FUNCT__ 2704 #define __FUNCT__ "MatLoad_MPIBAIJ" 2705 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2706 { 2707 Mat A; 2708 PetscErrorCode ierr; 2709 int fd; 2710 PetscInt i,nz,j,rstart,rend; 2711 PetscScalar *vals,*buf; 2712 MPI_Comm comm = ((PetscObject)viewer)->comm; 2713 MPI_Status status; 2714 PetscMPIInt rank,size,maxnz; 2715 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 2716 PetscInt *locrowlens,*procsnz = 0,*browners; 2717 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 2718 PetscMPIInt tag = ((PetscObject)viewer)->tag; 2719 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2720 PetscInt dcount,kmax,k,nzcount,tmp,mend; 2721 2722 PetscFunctionBegin; 2723 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2724 2725 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2726 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2727 if (!rank) { 2728 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2729 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2730 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2731 } 2732 2733 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2734 M = header[1]; N = header[2]; 2735 2736 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2737 2738 /* 2739 This code adds extra rows to make sure the number of rows is 2740 divisible by the blocksize 2741 */ 2742 Mbs = M/bs; 2743 extra_rows = bs - M + bs*Mbs; 2744 if (extra_rows == bs) extra_rows = 0; 2745 else Mbs++; 2746 if (extra_rows && !rank) { 2747 ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2748 } 2749 2750 /* determine ownership of all rows */ 2751 mbs = Mbs/size + ((Mbs % size) > rank); 2752 m = mbs*bs; 2753 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 2754 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2755 2756 /* process 0 needs enough room for process with most rows */ 2757 if (!rank) { 2758 mmax = rowners[1]; 2759 for (i=2; i<size; i++) { 2760 mmax = PetscMax(mmax,rowners[i]); 2761 } 2762 mmax*=bs; 2763 } else mmax = m; 2764 2765 rowners[0] = 0; 2766 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2767 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2768 rstart = rowners[rank]; 2769 rend = rowners[rank+1]; 2770 2771 /* distribute row lengths to all processors */ 2772 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 2773 if (!rank) { 2774 mend = m; 2775 if (size == 1) mend = mend - extra_rows; 2776 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 2777 for (j=mend; j<m; j++) locrowlens[j] = 1; 2778 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2779 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2780 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2781 for (j=0; j<m; j++) { 2782 procsnz[0] += locrowlens[j]; 2783 } 2784 for (i=1; i<size; i++) { 2785 mend = browners[i+1] - browners[i]; 2786 if (i == size-1) mend = mend - extra_rows; 2787 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 2788 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 2789 /* calculate the number of nonzeros on each processor */ 2790 for (j=0; j<browners[i+1]-browners[i]; j++) { 2791 procsnz[i] += rowlengths[j]; 2792 } 2793 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2794 } 2795 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2796 } else { 2797 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2798 } 2799 2800 if (!rank) { 2801 /* determine max buffer needed and allocate it */ 2802 maxnz = procsnz[0]; 2803 for (i=1; i<size; i++) { 2804 maxnz = PetscMax(maxnz,procsnz[i]); 2805 } 2806 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2807 2808 /* read in my part of the matrix column indices */ 2809 nz = procsnz[0]; 2810 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2811 mycols = ibuf; 2812 if (size == 1) nz -= extra_rows; 2813 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2814 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2815 2816 /* read in every ones (except the last) and ship off */ 2817 for (i=1; i<size-1; i++) { 2818 nz = procsnz[i]; 2819 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2820 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2821 } 2822 /* read in the stuff for the last proc */ 2823 if (size != 1) { 2824 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2825 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2826 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2827 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2828 } 2829 ierr = PetscFree(cols);CHKERRQ(ierr); 2830 } else { 2831 /* determine buffer space needed for message */ 2832 nz = 0; 2833 for (i=0; i<m; i++) { 2834 nz += locrowlens[i]; 2835 } 2836 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2837 mycols = ibuf; 2838 /* receive message of column indices*/ 2839 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2840 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2841 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2842 } 2843 2844 /* loop over local rows, determining number of off diagonal entries */ 2845 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2846 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2847 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2848 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2849 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2850 rowcount = 0; nzcount = 0; 2851 for (i=0; i<mbs; i++) { 2852 dcount = 0; 2853 odcount = 0; 2854 for (j=0; j<bs; j++) { 2855 kmax = locrowlens[rowcount]; 2856 for (k=0; k<kmax; k++) { 2857 tmp = mycols[nzcount++]/bs; 2858 if (!mask[tmp]) { 2859 mask[tmp] = 1; 2860 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2861 else masked1[dcount++] = tmp; 2862 } 2863 } 2864 rowcount++; 2865 } 2866 2867 dlens[i] = dcount; 2868 odlens[i] = odcount; 2869 2870 /* zero out the mask elements we set */ 2871 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2872 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2873 } 2874 2875 /* create our matrix */ 2876 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2877 ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2878 ierr = MatSetType(A,type);CHKERRQ(ierr) 2879 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2880 2881 /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */ 2882 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2883 2884 if (!rank) { 2885 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2886 /* read in my part of the matrix numerical values */ 2887 nz = procsnz[0]; 2888 vals = buf; 2889 mycols = ibuf; 2890 if (size == 1) nz -= extra_rows; 2891 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2892 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2893 2894 /* insert into matrix */ 2895 jj = rstart*bs; 2896 for (i=0; i<m; i++) { 2897 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2898 mycols += locrowlens[i]; 2899 vals += locrowlens[i]; 2900 jj++; 2901 } 2902 /* read in other processors (except the last one) and ship out */ 2903 for (i=1; i<size-1; i++) { 2904 nz = procsnz[i]; 2905 vals = buf; 2906 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2907 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2908 } 2909 /* the last proc */ 2910 if (size != 1){ 2911 nz = procsnz[i] - extra_rows; 2912 vals = buf; 2913 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2914 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2915 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2916 } 2917 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2918 } else { 2919 /* receive numeric values */ 2920 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2921 2922 /* receive message of values*/ 2923 vals = buf; 2924 mycols = ibuf; 2925 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2926 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2927 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2928 2929 /* insert into matrix */ 2930 jj = rstart*bs; 2931 for (i=0; i<m; i++) { 2932 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2933 mycols += locrowlens[i]; 2934 vals += locrowlens[i]; 2935 jj++; 2936 } 2937 } 2938 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2939 ierr = PetscFree(buf);CHKERRQ(ierr); 2940 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2941 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2942 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2943 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2944 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2945 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2946 2947 *newmat = A; 2948 PetscFunctionReturn(0); 2949 } 2950 2951 #undef __FUNCT__ 2952 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2953 /*@ 2954 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2955 2956 Input Parameters: 2957 . mat - the matrix 2958 . fact - factor 2959 2960 Collective on Mat 2961 2962 Level: advanced 2963 2964 Notes: 2965 This can also be set by the command line option: -mat_use_hash_table fact 2966 2967 .keywords: matrix, hashtable, factor, HT 2968 2969 .seealso: MatSetOption() 2970 @*/ 2971 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2972 { 2973 PetscErrorCode ierr,(*f)(Mat,PetscReal); 2974 2975 PetscFunctionBegin; 2976 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 2977 if (f) { 2978 ierr = (*f)(mat,fact);CHKERRQ(ierr); 2979 } 2980 PetscFunctionReturn(0); 2981 } 2982 2983 EXTERN_C_BEGIN 2984 #undef __FUNCT__ 2985 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 2986 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 2987 { 2988 Mat_MPIBAIJ *baij; 2989 2990 PetscFunctionBegin; 2991 baij = (Mat_MPIBAIJ*)mat->data; 2992 baij->ht_fact = fact; 2993 PetscFunctionReturn(0); 2994 } 2995 EXTERN_C_END 2996 2997 #undef __FUNCT__ 2998 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 2999 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3000 { 3001 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3002 PetscFunctionBegin; 3003 *Ad = a->A; 3004 *Ao = a->B; 3005 *colmap = a->garray; 3006 PetscFunctionReturn(0); 3007 } 3008