1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar); 15 16 /* UGLY, ugly, ugly 17 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 18 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 19 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 20 converts the entries into single precision and then calls ..._MatScalar() to put them 21 into the single precision data structures. 22 */ 23 #if defined(PETSC_USE_MAT_SINGLE) 24 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 25 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 26 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 27 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 28 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 29 #else 30 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 31 #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 32 #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 33 #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 34 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 35 #endif 36 37 #undef __FUNCT__ 38 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 39 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 40 { 41 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 42 PetscErrorCode ierr; 43 PetscInt i,*idxb = 0; 44 PetscScalar *va,*vb; 45 Vec vtmp; 46 47 PetscFunctionBegin; 48 49 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 50 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 51 if (idx) { 52 for (i=0; i<A->cmap.n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap.rstart;} 53 } 54 55 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap.n,&vtmp);CHKERRQ(ierr); 56 if (idx) {ierr = PetscMalloc(A->rmap.n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 57 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 58 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 59 60 for (i=0; i<A->rmap.n; i++){ 61 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap.bs*a->garray[idxb[i]/A->cmap.bs] + (idxb[i] % A->cmap.bs);} 62 } 63 64 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 65 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 66 if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);} 67 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 68 PetscFunctionReturn(0); 69 } 70 71 EXTERN_C_BEGIN 72 #undef __FUNCT__ 73 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 74 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 75 { 76 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 77 PetscErrorCode ierr; 78 79 PetscFunctionBegin; 80 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 81 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 82 PetscFunctionReturn(0); 83 } 84 EXTERN_C_END 85 86 EXTERN_C_BEGIN 87 #undef __FUNCT__ 88 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 89 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 90 { 91 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 92 PetscErrorCode ierr; 93 94 PetscFunctionBegin; 95 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 96 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 97 PetscFunctionReturn(0); 98 } 99 EXTERN_C_END 100 101 /* 102 Local utility routine that creates a mapping from the global column 103 number to the local number in the off-diagonal part of the local 104 storage of the matrix. This is done in a non scable way since the 105 length of colmap equals the global matrix length. 106 */ 107 #undef __FUNCT__ 108 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 109 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 110 { 111 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 112 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 113 PetscErrorCode ierr; 114 PetscInt nbs = B->nbs,i,bs=mat->rmap.bs; 115 116 PetscFunctionBegin; 117 #if defined (PETSC_USE_CTABLE) 118 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 119 for (i=0; i<nbs; i++){ 120 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 121 } 122 #else 123 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 124 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 125 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 126 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 127 #endif 128 PetscFunctionReturn(0); 129 } 130 131 #define CHUNKSIZE 10 132 133 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 134 { \ 135 \ 136 brow = row/bs; \ 137 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 138 rmax = aimax[brow]; nrow = ailen[brow]; \ 139 bcol = col/bs; \ 140 ridx = row % bs; cidx = col % bs; \ 141 low = 0; high = nrow; \ 142 while (high-low > 3) { \ 143 t = (low+high)/2; \ 144 if (rp[t] > bcol) high = t; \ 145 else low = t; \ 146 } \ 147 for (_i=low; _i<high; _i++) { \ 148 if (rp[_i] > bcol) break; \ 149 if (rp[_i] == bcol) { \ 150 bap = ap + bs2*_i + bs*cidx + ridx; \ 151 if (addv == ADD_VALUES) *bap += value; \ 152 else *bap = value; \ 153 goto a_noinsert; \ 154 } \ 155 } \ 156 if (a->nonew == 1) goto a_noinsert; \ 157 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 158 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 159 N = nrow++ - 1; \ 160 /* shift up all the later entries in this row */ \ 161 for (ii=N; ii>=_i; ii--) { \ 162 rp[ii+1] = rp[ii]; \ 163 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 164 } \ 165 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 166 rp[_i] = bcol; \ 167 ap[bs2*_i + bs*cidx + ridx] = value; \ 168 a_noinsert:; \ 169 ailen[brow] = nrow; \ 170 } 171 172 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 173 { \ 174 brow = row/bs; \ 175 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 176 rmax = bimax[brow]; nrow = bilen[brow]; \ 177 bcol = col/bs; \ 178 ridx = row % bs; cidx = col % bs; \ 179 low = 0; high = nrow; \ 180 while (high-low > 3) { \ 181 t = (low+high)/2; \ 182 if (rp[t] > bcol) high = t; \ 183 else low = t; \ 184 } \ 185 for (_i=low; _i<high; _i++) { \ 186 if (rp[_i] > bcol) break; \ 187 if (rp[_i] == bcol) { \ 188 bap = ap + bs2*_i + bs*cidx + ridx; \ 189 if (addv == ADD_VALUES) *bap += value; \ 190 else *bap = value; \ 191 goto b_noinsert; \ 192 } \ 193 } \ 194 if (b->nonew == 1) goto b_noinsert; \ 195 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 196 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 197 CHKMEMQ;\ 198 N = nrow++ - 1; \ 199 /* shift up all the later entries in this row */ \ 200 for (ii=N; ii>=_i; ii--) { \ 201 rp[ii+1] = rp[ii]; \ 202 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 203 } \ 204 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 205 rp[_i] = bcol; \ 206 ap[bs2*_i + bs*cidx + ridx] = value; \ 207 b_noinsert:; \ 208 bilen[brow] = nrow; \ 209 } 210 211 #if defined(PETSC_USE_MAT_SINGLE) 212 #undef __FUNCT__ 213 #define __FUNCT__ "MatSetValues_MPIBAIJ" 214 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 215 { 216 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 217 PetscErrorCode ierr; 218 PetscInt i,N = m*n; 219 MatScalar *vsingle; 220 221 PetscFunctionBegin; 222 if (N > b->setvalueslen) { 223 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 224 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 225 b->setvalueslen = N; 226 } 227 vsingle = b->setvaluescopy; 228 229 for (i=0; i<N; i++) { 230 vsingle[i] = v[i]; 231 } 232 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 233 PetscFunctionReturn(0); 234 } 235 236 #undef __FUNCT__ 237 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 238 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 239 { 240 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 241 PetscErrorCode ierr; 242 PetscInt i,N = m*n*b->bs2; 243 MatScalar *vsingle; 244 245 PetscFunctionBegin; 246 if (N > b->setvalueslen) { 247 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 248 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 249 b->setvalueslen = N; 250 } 251 vsingle = b->setvaluescopy; 252 for (i=0; i<N; i++) { 253 vsingle[i] = v[i]; 254 } 255 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 256 PetscFunctionReturn(0); 257 } 258 259 #undef __FUNCT__ 260 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 261 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 262 { 263 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 264 PetscErrorCode ierr; 265 PetscInt i,N = m*n; 266 MatScalar *vsingle; 267 268 PetscFunctionBegin; 269 if (N > b->setvalueslen) { 270 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 271 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 272 b->setvalueslen = N; 273 } 274 vsingle = b->setvaluescopy; 275 for (i=0; i<N; i++) { 276 vsingle[i] = v[i]; 277 } 278 ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 279 PetscFunctionReturn(0); 280 } 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 284 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 285 { 286 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 287 PetscErrorCode ierr; 288 PetscInt i,N = m*n*b->bs2; 289 MatScalar *vsingle; 290 291 PetscFunctionBegin; 292 if (N > b->setvalueslen) { 293 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 294 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 295 b->setvalueslen = N; 296 } 297 vsingle = b->setvaluescopy; 298 for (i=0; i<N; i++) { 299 vsingle[i] = v[i]; 300 } 301 ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 302 PetscFunctionReturn(0); 303 } 304 #endif 305 306 #undef __FUNCT__ 307 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 308 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 309 { 310 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 311 MatScalar value; 312 PetscTruth roworiented = baij->roworiented; 313 PetscErrorCode ierr; 314 PetscInt i,j,row,col; 315 PetscInt rstart_orig=mat->rmap.rstart; 316 PetscInt rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart; 317 PetscInt cend_orig=mat->cmap.rend,bs=mat->rmap.bs; 318 319 /* Some Variables required in the macro */ 320 Mat A = baij->A; 321 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 322 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 323 MatScalar *aa=a->a; 324 325 Mat B = baij->B; 326 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 327 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 328 MatScalar *ba=b->a; 329 330 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 331 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 332 MatScalar *ap,*bap; 333 334 PetscFunctionBegin; 335 for (i=0; i<m; i++) { 336 if (im[i] < 0) continue; 337 #if defined(PETSC_USE_DEBUG) 338 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 339 #endif 340 if (im[i] >= rstart_orig && im[i] < rend_orig) { 341 row = im[i] - rstart_orig; 342 for (j=0; j<n; j++) { 343 if (in[j] >= cstart_orig && in[j] < cend_orig){ 344 col = in[j] - cstart_orig; 345 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 346 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 347 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 348 } else if (in[j] < 0) continue; 349 #if defined(PETSC_USE_DEBUG) 350 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap.N-1);} 351 #endif 352 else { 353 if (mat->was_assembled) { 354 if (!baij->colmap) { 355 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 356 } 357 #if defined (PETSC_USE_CTABLE) 358 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 359 col = col - 1; 360 #else 361 col = baij->colmap[in[j]/bs] - 1; 362 #endif 363 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 364 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 365 col = in[j]; 366 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 367 B = baij->B; 368 b = (Mat_SeqBAIJ*)(B)->data; 369 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 370 ba=b->a; 371 } else col += in[j]%bs; 372 } else col = in[j]; 373 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 374 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 375 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 376 } 377 } 378 } else { 379 if (!baij->donotstash) { 380 if (roworiented) { 381 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 382 } else { 383 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 384 } 385 } 386 } 387 } 388 PetscFunctionReturn(0); 389 } 390 391 #undef __FUNCT__ 392 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar" 393 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 394 { 395 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 396 const MatScalar *value; 397 MatScalar *barray=baij->barray; 398 PetscTruth roworiented = baij->roworiented; 399 PetscErrorCode ierr; 400 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 401 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 402 PetscInt cend=baij->cendbs,bs=mat->rmap.bs,bs2=baij->bs2; 403 404 PetscFunctionBegin; 405 if(!barray) { 406 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 407 baij->barray = barray; 408 } 409 410 if (roworiented) { 411 stepval = (n-1)*bs; 412 } else { 413 stepval = (m-1)*bs; 414 } 415 for (i=0; i<m; i++) { 416 if (im[i] < 0) continue; 417 #if defined(PETSC_USE_DEBUG) 418 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 419 #endif 420 if (im[i] >= rstart && im[i] < rend) { 421 row = im[i] - rstart; 422 for (j=0; j<n; j++) { 423 /* If NumCol = 1 then a copy is not required */ 424 if ((roworiented) && (n == 1)) { 425 barray = (MatScalar*)v + i*bs2; 426 } else if((!roworiented) && (m == 1)) { 427 barray = (MatScalar*)v + j*bs2; 428 } else { /* Here a copy is required */ 429 if (roworiented) { 430 value = v + i*(stepval+bs)*bs + j*bs; 431 } else { 432 value = v + j*(stepval+bs)*bs + i*bs; 433 } 434 for (ii=0; ii<bs; ii++,value+=stepval) { 435 for (jj=0; jj<bs; jj++) { 436 *barray++ = *value++; 437 } 438 } 439 barray -=bs2; 440 } 441 442 if (in[j] >= cstart && in[j] < cend){ 443 col = in[j] - cstart; 444 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 445 } 446 else if (in[j] < 0) continue; 447 #if defined(PETSC_USE_DEBUG) 448 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 449 #endif 450 else { 451 if (mat->was_assembled) { 452 if (!baij->colmap) { 453 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 454 } 455 456 #if defined(PETSC_USE_DEBUG) 457 #if defined (PETSC_USE_CTABLE) 458 { PetscInt data; 459 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 460 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 461 } 462 #else 463 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 464 #endif 465 #endif 466 #if defined (PETSC_USE_CTABLE) 467 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 468 col = (col - 1)/bs; 469 #else 470 col = (baij->colmap[in[j]] - 1)/bs; 471 #endif 472 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 473 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 474 col = in[j]; 475 } 476 } 477 else col = in[j]; 478 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 479 } 480 } 481 } else { 482 if (!baij->donotstash) { 483 if (roworiented) { 484 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 485 } else { 486 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 487 } 488 } 489 } 490 } 491 PetscFunctionReturn(0); 492 } 493 494 #define HASH_KEY 0.6180339887 495 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 496 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 497 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 498 #undef __FUNCT__ 499 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar" 500 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 501 { 502 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 503 PetscTruth roworiented = baij->roworiented; 504 PetscErrorCode ierr; 505 PetscInt i,j,row,col; 506 PetscInt rstart_orig=mat->rmap.rstart; 507 PetscInt rend_orig=mat->rmap.rend,Nbs=baij->Nbs; 508 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap.bs,*HT=baij->ht,idx; 509 PetscReal tmp; 510 MatScalar **HD = baij->hd,value; 511 #if defined(PETSC_USE_DEBUG) 512 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 513 #endif 514 515 PetscFunctionBegin; 516 517 for (i=0; i<m; i++) { 518 #if defined(PETSC_USE_DEBUG) 519 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 520 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 521 #endif 522 row = im[i]; 523 if (row >= rstart_orig && row < rend_orig) { 524 for (j=0; j<n; j++) { 525 col = in[j]; 526 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 527 /* Look up PetscInto the Hash Table */ 528 key = (row/bs)*Nbs+(col/bs)+1; 529 h1 = HASH(size,key,tmp); 530 531 532 idx = h1; 533 #if defined(PETSC_USE_DEBUG) 534 insert_ct++; 535 total_ct++; 536 if (HT[idx] != key) { 537 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 538 if (idx == size) { 539 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 540 if (idx == h1) { 541 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 542 } 543 } 544 } 545 #else 546 if (HT[idx] != key) { 547 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 548 if (idx == size) { 549 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 550 if (idx == h1) { 551 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 552 } 553 } 554 } 555 #endif 556 /* A HASH table entry is found, so insert the values at the correct address */ 557 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 558 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 559 } 560 } else { 561 if (!baij->donotstash) { 562 if (roworiented) { 563 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 564 } else { 565 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 566 } 567 } 568 } 569 } 570 #if defined(PETSC_USE_DEBUG) 571 baij->ht_total_ct = total_ct; 572 baij->ht_insert_ct = insert_ct; 573 #endif 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 579 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 580 { 581 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 582 PetscTruth roworiented = baij->roworiented; 583 PetscErrorCode ierr; 584 PetscInt i,j,ii,jj,row,col; 585 PetscInt rstart=baij->rstartbs; 586 PetscInt rend=mat->rmap.rend,stepval,bs=mat->rmap.bs,bs2=baij->bs2,nbs2=n*bs2; 587 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 588 PetscReal tmp; 589 MatScalar **HD = baij->hd,*baij_a; 590 const MatScalar *v_t,*value; 591 #if defined(PETSC_USE_DEBUG) 592 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 593 #endif 594 595 PetscFunctionBegin; 596 597 if (roworiented) { 598 stepval = (n-1)*bs; 599 } else { 600 stepval = (m-1)*bs; 601 } 602 for (i=0; i<m; i++) { 603 #if defined(PETSC_USE_DEBUG) 604 if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 605 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 606 #endif 607 row = im[i]; 608 v_t = v + i*nbs2; 609 if (row >= rstart && row < rend) { 610 for (j=0; j<n; j++) { 611 col = in[j]; 612 613 /* Look up into the Hash Table */ 614 key = row*Nbs+col+1; 615 h1 = HASH(size,key,tmp); 616 617 idx = h1; 618 #if defined(PETSC_USE_DEBUG) 619 total_ct++; 620 insert_ct++; 621 if (HT[idx] != key) { 622 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 623 if (idx == size) { 624 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 625 if (idx == h1) { 626 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 627 } 628 } 629 } 630 #else 631 if (HT[idx] != key) { 632 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 633 if (idx == size) { 634 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 635 if (idx == h1) { 636 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 637 } 638 } 639 } 640 #endif 641 baij_a = HD[idx]; 642 if (roworiented) { 643 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 644 /* value = v + (i*(stepval+bs)+j)*bs; */ 645 value = v_t; 646 v_t += bs; 647 if (addv == ADD_VALUES) { 648 for (ii=0; ii<bs; ii++,value+=stepval) { 649 for (jj=ii; jj<bs2; jj+=bs) { 650 baij_a[jj] += *value++; 651 } 652 } 653 } else { 654 for (ii=0; ii<bs; ii++,value+=stepval) { 655 for (jj=ii; jj<bs2; jj+=bs) { 656 baij_a[jj] = *value++; 657 } 658 } 659 } 660 } else { 661 value = v + j*(stepval+bs)*bs + i*bs; 662 if (addv == ADD_VALUES) { 663 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 664 for (jj=0; jj<bs; jj++) { 665 baij_a[jj] += *value++; 666 } 667 } 668 } else { 669 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 670 for (jj=0; jj<bs; jj++) { 671 baij_a[jj] = *value++; 672 } 673 } 674 } 675 } 676 } 677 } else { 678 if (!baij->donotstash) { 679 if (roworiented) { 680 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 681 } else { 682 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 683 } 684 } 685 } 686 } 687 #if defined(PETSC_USE_DEBUG) 688 baij->ht_total_ct = total_ct; 689 baij->ht_insert_ct = insert_ct; 690 #endif 691 PetscFunctionReturn(0); 692 } 693 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatGetValues_MPIBAIJ" 696 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 697 { 698 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 699 PetscErrorCode ierr; 700 PetscInt bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend; 701 PetscInt bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data; 702 703 PetscFunctionBegin; 704 for (i=0; i<m; i++) { 705 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 706 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 707 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 708 row = idxm[i] - bsrstart; 709 for (j=0; j<n; j++) { 710 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 711 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 712 if (idxn[j] >= bscstart && idxn[j] < bscend){ 713 col = idxn[j] - bscstart; 714 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 715 } else { 716 if (!baij->colmap) { 717 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 718 } 719 #if defined (PETSC_USE_CTABLE) 720 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 721 data --; 722 #else 723 data = baij->colmap[idxn[j]/bs]-1; 724 #endif 725 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 726 else { 727 col = data + idxn[j]%bs; 728 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 729 } 730 } 731 } 732 } else { 733 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 734 } 735 } 736 PetscFunctionReturn(0); 737 } 738 739 #undef __FUNCT__ 740 #define __FUNCT__ "MatNorm_MPIBAIJ" 741 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 742 { 743 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 744 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 745 PetscErrorCode ierr; 746 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap.bs,nz,row,col; 747 PetscReal sum = 0.0; 748 MatScalar *v; 749 750 PetscFunctionBegin; 751 if (baij->size == 1) { 752 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 753 } else { 754 if (type == NORM_FROBENIUS) { 755 v = amat->a; 756 nz = amat->nz*bs2; 757 for (i=0; i<nz; i++) { 758 #if defined(PETSC_USE_COMPLEX) 759 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 760 #else 761 sum += (*v)*(*v); v++; 762 #endif 763 } 764 v = bmat->a; 765 nz = bmat->nz*bs2; 766 for (i=0; i<nz; i++) { 767 #if defined(PETSC_USE_COMPLEX) 768 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 769 #else 770 sum += (*v)*(*v); v++; 771 #endif 772 } 773 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 774 *nrm = sqrt(*nrm); 775 } else if (type == NORM_1) { /* max column sum */ 776 PetscReal *tmp,*tmp2; 777 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 778 ierr = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 779 tmp2 = tmp + mat->cmap.N; 780 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 781 v = amat->a; jj = amat->j; 782 for (i=0; i<amat->nz; i++) { 783 for (j=0; j<bs; j++){ 784 col = bs*(cstart + *jj) + j; /* column index */ 785 for (row=0; row<bs; row++){ 786 tmp[col] += PetscAbsScalar(*v); v++; 787 } 788 } 789 jj++; 790 } 791 v = bmat->a; jj = bmat->j; 792 for (i=0; i<bmat->nz; i++) { 793 for (j=0; j<bs; j++){ 794 col = bs*garray[*jj] + j; 795 for (row=0; row<bs; row++){ 796 tmp[col] += PetscAbsScalar(*v); v++; 797 } 798 } 799 jj++; 800 } 801 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 802 *nrm = 0.0; 803 for (j=0; j<mat->cmap.N; j++) { 804 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 805 } 806 ierr = PetscFree(tmp);CHKERRQ(ierr); 807 } else if (type == NORM_INFINITY) { /* max row sum */ 808 PetscReal *sums; 809 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr) 810 sum = 0.0; 811 for (j=0; j<amat->mbs; j++) { 812 for (row=0; row<bs; row++) sums[row] = 0.0; 813 v = amat->a + bs2*amat->i[j]; 814 nz = amat->i[j+1]-amat->i[j]; 815 for (i=0; i<nz; i++) { 816 for (col=0; col<bs; col++){ 817 for (row=0; row<bs; row++){ 818 sums[row] += PetscAbsScalar(*v); v++; 819 } 820 } 821 } 822 v = bmat->a + bs2*bmat->i[j]; 823 nz = bmat->i[j+1]-bmat->i[j]; 824 for (i=0; i<nz; i++) { 825 for (col=0; col<bs; col++){ 826 for (row=0; row<bs; row++){ 827 sums[row] += PetscAbsScalar(*v); v++; 828 } 829 } 830 } 831 for (row=0; row<bs; row++){ 832 if (sums[row] > sum) sum = sums[row]; 833 } 834 } 835 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 836 ierr = PetscFree(sums);CHKERRQ(ierr); 837 } else { 838 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 839 } 840 } 841 PetscFunctionReturn(0); 842 } 843 844 /* 845 Creates the hash table, and sets the table 846 This table is created only once. 847 If new entried need to be added to the matrix 848 then the hash table has to be destroyed and 849 recreated. 850 */ 851 #undef __FUNCT__ 852 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 853 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 854 { 855 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 856 Mat A = baij->A,B=baij->B; 857 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 858 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 859 PetscErrorCode ierr; 860 PetscInt size,bs2=baij->bs2,rstart=baij->rstartbs; 861 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 862 PetscInt *HT,key; 863 MatScalar **HD; 864 PetscReal tmp; 865 #if defined(PETSC_USE_INFO) 866 PetscInt ct=0,max=0; 867 #endif 868 869 PetscFunctionBegin; 870 baij->ht_size=(PetscInt)(factor*nz); 871 size = baij->ht_size; 872 873 if (baij->ht) { 874 PetscFunctionReturn(0); 875 } 876 877 /* Allocate Memory for Hash Table */ 878 ierr = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 879 baij->ht = (PetscInt*)(baij->hd + size); 880 HD = baij->hd; 881 HT = baij->ht; 882 883 884 ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr); 885 886 887 /* Loop Over A */ 888 for (i=0; i<a->mbs; i++) { 889 for (j=ai[i]; j<ai[i+1]; j++) { 890 row = i+rstart; 891 col = aj[j]+cstart; 892 893 key = row*Nbs + col + 1; 894 h1 = HASH(size,key,tmp); 895 for (k=0; k<size; k++){ 896 if (!HT[(h1+k)%size]) { 897 HT[(h1+k)%size] = key; 898 HD[(h1+k)%size] = a->a + j*bs2; 899 break; 900 #if defined(PETSC_USE_INFO) 901 } else { 902 ct++; 903 #endif 904 } 905 } 906 #if defined(PETSC_USE_INFO) 907 if (k> max) max = k; 908 #endif 909 } 910 } 911 /* Loop Over B */ 912 for (i=0; i<b->mbs; i++) { 913 for (j=bi[i]; j<bi[i+1]; j++) { 914 row = i+rstart; 915 col = garray[bj[j]]; 916 key = row*Nbs + col + 1; 917 h1 = HASH(size,key,tmp); 918 for (k=0; k<size; k++){ 919 if (!HT[(h1+k)%size]) { 920 HT[(h1+k)%size] = key; 921 HD[(h1+k)%size] = b->a + j*bs2; 922 break; 923 #if defined(PETSC_USE_INFO) 924 } else { 925 ct++; 926 #endif 927 } 928 } 929 #if defined(PETSC_USE_INFO) 930 if (k> max) max = k; 931 #endif 932 } 933 } 934 935 /* Print Summary */ 936 #if defined(PETSC_USE_INFO) 937 for (i=0,j=0; i<size; i++) { 938 if (HT[i]) {j++;} 939 } 940 ierr = PetscInfo2(0,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 941 #endif 942 PetscFunctionReturn(0); 943 } 944 945 #undef __FUNCT__ 946 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 947 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 948 { 949 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 950 PetscErrorCode ierr; 951 PetscInt nstash,reallocs; 952 InsertMode addv; 953 954 PetscFunctionBegin; 955 if (baij->donotstash) { 956 PetscFunctionReturn(0); 957 } 958 959 /* make sure all processors are either in INSERTMODE or ADDMODE */ 960 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 961 if (addv == (ADD_VALUES|INSERT_VALUES)) { 962 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 963 } 964 mat->insertmode = addv; /* in case this processor had no cache */ 965 966 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 967 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rangebs);CHKERRQ(ierr); 968 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 969 ierr = PetscInfo2(0,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 970 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 971 ierr = PetscInfo2(0,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 972 PetscFunctionReturn(0); 973 } 974 975 #undef __FUNCT__ 976 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 977 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 978 { 979 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 980 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 981 PetscErrorCode ierr; 982 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 983 PetscInt *row,*col,other_disassembled; 984 PetscTruth r1,r2,r3; 985 MatScalar *val; 986 InsertMode addv = mat->insertmode; 987 PetscMPIInt n; 988 989 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 990 PetscFunctionBegin; 991 if (!baij->donotstash) { 992 while (1) { 993 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 994 if (!flg) break; 995 996 for (i=0; i<n;) { 997 /* Now identify the consecutive vals belonging to the same row */ 998 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 999 if (j < n) ncols = j-i; 1000 else ncols = n-i; 1001 /* Now assemble all these values with a single function call */ 1002 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 1003 i = j; 1004 } 1005 } 1006 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 1007 /* Now process the block-stash. Since the values are stashed column-oriented, 1008 set the roworiented flag to column oriented, and after MatSetValues() 1009 restore the original flags */ 1010 r1 = baij->roworiented; 1011 r2 = a->roworiented; 1012 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 1013 baij->roworiented = PETSC_FALSE; 1014 a->roworiented = PETSC_FALSE; 1015 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 1016 while (1) { 1017 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1018 if (!flg) break; 1019 1020 for (i=0; i<n;) { 1021 /* Now identify the consecutive vals belonging to the same row */ 1022 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1023 if (j < n) ncols = j-i; 1024 else ncols = n-i; 1025 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1026 i = j; 1027 } 1028 } 1029 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1030 baij->roworiented = r1; 1031 a->roworiented = r2; 1032 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 1033 } 1034 1035 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1036 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1037 1038 /* determine if any processor has disassembled, if so we must 1039 also disassemble ourselfs, in order that we may reassemble. */ 1040 /* 1041 if nonzero structure of submatrix B cannot change then we know that 1042 no processor disassembled thus we can skip this stuff 1043 */ 1044 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1045 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1046 if (mat->was_assembled && !other_disassembled) { 1047 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1048 } 1049 } 1050 1051 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1052 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1053 } 1054 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 1055 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1056 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1057 1058 #if defined(PETSC_USE_INFO) 1059 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1060 ierr = PetscInfo1(0,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 1061 baij->ht_total_ct = 0; 1062 baij->ht_insert_ct = 0; 1063 } 1064 #endif 1065 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1066 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1067 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1068 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1069 } 1070 1071 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1072 baij->rowvalues = 0; 1073 PetscFunctionReturn(0); 1074 } 1075 1076 #undef __FUNCT__ 1077 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 1078 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1079 { 1080 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1081 PetscErrorCode ierr; 1082 PetscMPIInt size = baij->size,rank = baij->rank; 1083 PetscInt bs = mat->rmap.bs; 1084 PetscTruth iascii,isdraw; 1085 PetscViewer sviewer; 1086 PetscViewerFormat format; 1087 1088 PetscFunctionBegin; 1089 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1090 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1091 if (iascii) { 1092 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1093 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1094 MatInfo info; 1095 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1096 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1097 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 1098 rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 1099 mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr); 1100 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1101 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1102 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1103 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1104 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1105 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1106 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1107 PetscFunctionReturn(0); 1108 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1109 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1110 PetscFunctionReturn(0); 1111 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1112 PetscFunctionReturn(0); 1113 } 1114 } 1115 1116 if (isdraw) { 1117 PetscDraw draw; 1118 PetscTruth isnull; 1119 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1120 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1121 } 1122 1123 if (size == 1) { 1124 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 1125 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1126 } else { 1127 /* assemble the entire matrix onto first processor. */ 1128 Mat A; 1129 Mat_SeqBAIJ *Aloc; 1130 PetscInt M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1131 MatScalar *a; 1132 1133 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1134 /* Perhaps this should be the type of mat? */ 1135 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 1136 if (!rank) { 1137 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1138 } else { 1139 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1140 } 1141 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1142 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1143 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1144 1145 /* copy over the A part */ 1146 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1147 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1148 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1149 1150 for (i=0; i<mbs; i++) { 1151 rvals[0] = bs*(baij->rstartbs + i); 1152 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1153 for (j=ai[i]; j<ai[i+1]; j++) { 1154 col = (baij->cstartbs+aj[j])*bs; 1155 for (k=0; k<bs; k++) { 1156 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1157 col++; a += bs; 1158 } 1159 } 1160 } 1161 /* copy over the B part */ 1162 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1163 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1164 for (i=0; i<mbs; i++) { 1165 rvals[0] = bs*(baij->rstartbs + i); 1166 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1167 for (j=ai[i]; j<ai[i+1]; j++) { 1168 col = baij->garray[aj[j]]*bs; 1169 for (k=0; k<bs; k++) { 1170 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1171 col++; a += bs; 1172 } 1173 } 1174 } 1175 ierr = PetscFree(rvals);CHKERRQ(ierr); 1176 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1177 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1178 /* 1179 Everyone has to call to draw the matrix since the graphics waits are 1180 synchronized across all processors that share the PetscDraw object 1181 */ 1182 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1183 if (!rank) { 1184 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1185 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1186 } 1187 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1188 ierr = MatDestroy(A);CHKERRQ(ierr); 1189 } 1190 PetscFunctionReturn(0); 1191 } 1192 1193 #undef __FUNCT__ 1194 #define __FUNCT__ "MatView_MPIBAIJ" 1195 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1196 { 1197 PetscErrorCode ierr; 1198 PetscTruth iascii,isdraw,issocket,isbinary; 1199 1200 PetscFunctionBegin; 1201 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1202 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1203 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1204 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1205 if (iascii || isdraw || issocket || isbinary) { 1206 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1207 } else { 1208 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1209 } 1210 PetscFunctionReturn(0); 1211 } 1212 1213 #undef __FUNCT__ 1214 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1215 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1216 { 1217 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1218 PetscErrorCode ierr; 1219 1220 PetscFunctionBegin; 1221 #if defined(PETSC_USE_LOG) 1222 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N); 1223 #endif 1224 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1225 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1226 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1227 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1228 #if defined (PETSC_USE_CTABLE) 1229 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 1230 #else 1231 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1232 #endif 1233 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1234 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1235 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1236 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1237 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1238 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 1239 #if defined(PETSC_USE_MAT_SINGLE) 1240 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 1241 #endif 1242 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1243 ierr = PetscFree(baij);CHKERRQ(ierr); 1244 1245 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1246 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1247 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1248 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1249 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1250 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1251 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1252 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1253 PetscFunctionReturn(0); 1254 } 1255 1256 #undef __FUNCT__ 1257 #define __FUNCT__ "MatMult_MPIBAIJ" 1258 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1259 { 1260 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1261 PetscErrorCode ierr; 1262 PetscInt nt; 1263 1264 PetscFunctionBegin; 1265 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1266 if (nt != A->cmap.n) { 1267 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1268 } 1269 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1270 if (nt != A->rmap.n) { 1271 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1272 } 1273 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1274 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1275 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1276 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1277 PetscFunctionReturn(0); 1278 } 1279 1280 #undef __FUNCT__ 1281 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1282 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1283 { 1284 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1285 PetscErrorCode ierr; 1286 1287 PetscFunctionBegin; 1288 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1289 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1290 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1291 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1292 PetscFunctionReturn(0); 1293 } 1294 1295 #undef __FUNCT__ 1296 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1297 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1298 { 1299 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1300 PetscErrorCode ierr; 1301 PetscTruth merged; 1302 1303 PetscFunctionBegin; 1304 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1305 /* do nondiagonal part */ 1306 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1307 if (!merged) { 1308 /* send it on its way */ 1309 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1310 /* do local part */ 1311 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1312 /* receive remote parts: note this assumes the values are not actually */ 1313 /* inserted in yy until the next line */ 1314 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1315 } else { 1316 /* do local part */ 1317 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1318 /* send it on its way */ 1319 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1320 /* values actually were received in the Begin() but we need to call this nop */ 1321 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1322 } 1323 PetscFunctionReturn(0); 1324 } 1325 1326 #undef __FUNCT__ 1327 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1328 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1329 { 1330 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1331 PetscErrorCode ierr; 1332 1333 PetscFunctionBegin; 1334 /* do nondiagonal part */ 1335 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1336 /* send it on its way */ 1337 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1338 /* do local part */ 1339 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1340 /* receive remote parts: note this assumes the values are not actually */ 1341 /* inserted in yy until the next line, which is true for my implementation*/ 1342 /* but is not perhaps always true. */ 1343 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1344 PetscFunctionReturn(0); 1345 } 1346 1347 /* 1348 This only works correctly for square matrices where the subblock A->A is the 1349 diagonal block 1350 */ 1351 #undef __FUNCT__ 1352 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1353 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1354 { 1355 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1356 PetscErrorCode ierr; 1357 1358 PetscFunctionBegin; 1359 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1360 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1361 PetscFunctionReturn(0); 1362 } 1363 1364 #undef __FUNCT__ 1365 #define __FUNCT__ "MatScale_MPIBAIJ" 1366 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1367 { 1368 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1369 PetscErrorCode ierr; 1370 1371 PetscFunctionBegin; 1372 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1373 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1374 PetscFunctionReturn(0); 1375 } 1376 1377 #undef __FUNCT__ 1378 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1379 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1380 { 1381 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1382 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1383 PetscErrorCode ierr; 1384 PetscInt bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1385 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend; 1386 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1387 1388 PetscFunctionBegin; 1389 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1390 mat->getrowactive = PETSC_TRUE; 1391 1392 if (!mat->rowvalues && (idx || v)) { 1393 /* 1394 allocate enough space to hold information from the longest row. 1395 */ 1396 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1397 PetscInt max = 1,mbs = mat->mbs,tmp; 1398 for (i=0; i<mbs; i++) { 1399 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1400 if (max < tmp) { max = tmp; } 1401 } 1402 ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1403 mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1404 } 1405 1406 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1407 lrow = row - brstart; 1408 1409 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1410 if (!v) {pvA = 0; pvB = 0;} 1411 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1412 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1413 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1414 nztot = nzA + nzB; 1415 1416 cmap = mat->garray; 1417 if (v || idx) { 1418 if (nztot) { 1419 /* Sort by increasing column numbers, assuming A and B already sorted */ 1420 PetscInt imark = -1; 1421 if (v) { 1422 *v = v_p = mat->rowvalues; 1423 for (i=0; i<nzB; i++) { 1424 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1425 else break; 1426 } 1427 imark = i; 1428 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1429 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1430 } 1431 if (idx) { 1432 *idx = idx_p = mat->rowindices; 1433 if (imark > -1) { 1434 for (i=0; i<imark; i++) { 1435 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1436 } 1437 } else { 1438 for (i=0; i<nzB; i++) { 1439 if (cmap[cworkB[i]/bs] < cstart) 1440 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1441 else break; 1442 } 1443 imark = i; 1444 } 1445 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1446 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1447 } 1448 } else { 1449 if (idx) *idx = 0; 1450 if (v) *v = 0; 1451 } 1452 } 1453 *nz = nztot; 1454 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1455 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1456 PetscFunctionReturn(0); 1457 } 1458 1459 #undef __FUNCT__ 1460 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1461 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1462 { 1463 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1464 1465 PetscFunctionBegin; 1466 if (!baij->getrowactive) { 1467 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1468 } 1469 baij->getrowactive = PETSC_FALSE; 1470 PetscFunctionReturn(0); 1471 } 1472 1473 #undef __FUNCT__ 1474 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1475 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1476 { 1477 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1478 PetscErrorCode ierr; 1479 1480 PetscFunctionBegin; 1481 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1482 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1483 PetscFunctionReturn(0); 1484 } 1485 1486 #undef __FUNCT__ 1487 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1488 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1489 { 1490 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1491 Mat A = a->A,B = a->B; 1492 PetscErrorCode ierr; 1493 PetscReal isend[5],irecv[5]; 1494 1495 PetscFunctionBegin; 1496 info->block_size = (PetscReal)matin->rmap.bs; 1497 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1498 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1499 isend[3] = info->memory; isend[4] = info->mallocs; 1500 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1501 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1502 isend[3] += info->memory; isend[4] += info->mallocs; 1503 if (flag == MAT_LOCAL) { 1504 info->nz_used = isend[0]; 1505 info->nz_allocated = isend[1]; 1506 info->nz_unneeded = isend[2]; 1507 info->memory = isend[3]; 1508 info->mallocs = isend[4]; 1509 } else if (flag == MAT_GLOBAL_MAX) { 1510 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1511 info->nz_used = irecv[0]; 1512 info->nz_allocated = irecv[1]; 1513 info->nz_unneeded = irecv[2]; 1514 info->memory = irecv[3]; 1515 info->mallocs = irecv[4]; 1516 } else if (flag == MAT_GLOBAL_SUM) { 1517 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1518 info->nz_used = irecv[0]; 1519 info->nz_allocated = irecv[1]; 1520 info->nz_unneeded = irecv[2]; 1521 info->memory = irecv[3]; 1522 info->mallocs = irecv[4]; 1523 } else { 1524 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1525 } 1526 info->rows_global = (PetscReal)A->rmap.N; 1527 info->columns_global = (PetscReal)A->cmap.N; 1528 info->rows_local = (PetscReal)A->rmap.N; 1529 info->columns_local = (PetscReal)A->cmap.N; 1530 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1531 info->fill_ratio_needed = 0; 1532 info->factor_mallocs = 0; 1533 PetscFunctionReturn(0); 1534 } 1535 1536 #undef __FUNCT__ 1537 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1538 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg) 1539 { 1540 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1541 PetscErrorCode ierr; 1542 1543 PetscFunctionBegin; 1544 switch (op) { 1545 case MAT_NEW_NONZERO_LOCATIONS: 1546 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1547 case MAT_KEEP_ZEROED_ROWS: 1548 case MAT_NEW_NONZERO_LOCATION_ERR: 1549 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1550 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1551 break; 1552 case MAT_ROW_ORIENTED: 1553 a->roworiented = flg; 1554 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1555 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1556 break; 1557 case MAT_NEW_DIAGONALS: 1558 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1559 break; 1560 case MAT_IGNORE_OFF_PROC_ENTRIES: 1561 a->donotstash = flg; 1562 break; 1563 case MAT_USE_HASH_TABLE: 1564 a->ht_flag = flg; 1565 break; 1566 case MAT_SYMMETRIC: 1567 case MAT_STRUCTURALLY_SYMMETRIC: 1568 case MAT_HERMITIAN: 1569 case MAT_SYMMETRY_ETERNAL: 1570 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1571 break; 1572 default: 1573 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1574 } 1575 PetscFunctionReturn(0); 1576 } 1577 1578 #undef __FUNCT__ 1579 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1580 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1581 { 1582 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1583 Mat_SeqBAIJ *Aloc; 1584 Mat B; 1585 PetscErrorCode ierr; 1586 PetscInt M=A->rmap.N,N=A->cmap.N,*ai,*aj,i,*rvals,j,k,col; 1587 PetscInt bs=A->rmap.bs,mbs=baij->mbs; 1588 MatScalar *a; 1589 1590 PetscFunctionBegin; 1591 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1592 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1593 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1594 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1595 ierr = MatMPIBAIJSetPreallocation(B,A->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1596 1597 /* copy over the A part */ 1598 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1599 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1600 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1601 1602 for (i=0; i<mbs; i++) { 1603 rvals[0] = bs*(baij->rstartbs + i); 1604 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1605 for (j=ai[i]; j<ai[i+1]; j++) { 1606 col = (baij->cstartbs+aj[j])*bs; 1607 for (k=0; k<bs; k++) { 1608 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1609 col++; a += bs; 1610 } 1611 } 1612 } 1613 /* copy over the B part */ 1614 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1615 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1616 for (i=0; i<mbs; i++) { 1617 rvals[0] = bs*(baij->rstartbs + i); 1618 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1619 for (j=ai[i]; j<ai[i+1]; j++) { 1620 col = baij->garray[aj[j]]*bs; 1621 for (k=0; k<bs; k++) { 1622 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1623 col++; a += bs; 1624 } 1625 } 1626 } 1627 ierr = PetscFree(rvals);CHKERRQ(ierr); 1628 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1629 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1630 1631 if (matout) { 1632 *matout = B; 1633 } else { 1634 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1635 } 1636 PetscFunctionReturn(0); 1637 } 1638 1639 #undef __FUNCT__ 1640 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1641 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1642 { 1643 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1644 Mat a = baij->A,b = baij->B; 1645 PetscErrorCode ierr; 1646 PetscInt s1,s2,s3; 1647 1648 PetscFunctionBegin; 1649 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1650 if (rr) { 1651 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1652 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1653 /* Overlap communication with computation. */ 1654 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1655 } 1656 if (ll) { 1657 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1658 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1659 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1660 } 1661 /* scale the diagonal block */ 1662 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1663 1664 if (rr) { 1665 /* Do a scatter end and then right scale the off-diagonal block */ 1666 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1667 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1668 } 1669 1670 PetscFunctionReturn(0); 1671 } 1672 1673 #undef __FUNCT__ 1674 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1675 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 1676 { 1677 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1678 PetscErrorCode ierr; 1679 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1680 PetscInt i,*owners = A->rmap.range; 1681 PetscInt *nprocs,j,idx,nsends,row; 1682 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1683 PetscInt *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1; 1684 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap.rstart; 1685 MPI_Comm comm = A->comm; 1686 MPI_Request *send_waits,*recv_waits; 1687 MPI_Status recv_status,*send_status; 1688 #if defined(PETSC_DEBUG) 1689 PetscTruth found = PETSC_FALSE; 1690 #endif 1691 1692 PetscFunctionBegin; 1693 /* first count number of contributors to each processor */ 1694 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1695 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1696 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1697 j = 0; 1698 for (i=0; i<N; i++) { 1699 if (lastidx > (idx = rows[i])) j = 0; 1700 lastidx = idx; 1701 for (; j<size; j++) { 1702 if (idx >= owners[j] && idx < owners[j+1]) { 1703 nprocs[2*j]++; 1704 nprocs[2*j+1] = 1; 1705 owner[i] = j; 1706 #if defined(PETSC_DEBUG) 1707 found = PETSC_TRUE; 1708 #endif 1709 break; 1710 } 1711 } 1712 #if defined(PETSC_DEBUG) 1713 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1714 found = PETSC_FALSE; 1715 #endif 1716 } 1717 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1718 1719 /* inform other processors of number of messages and max length*/ 1720 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1721 1722 /* post receives: */ 1723 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1724 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1725 for (i=0; i<nrecvs; i++) { 1726 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1727 } 1728 1729 /* do sends: 1730 1) starts[i] gives the starting index in svalues for stuff going to 1731 the ith processor 1732 */ 1733 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1734 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1735 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1736 starts[0] = 0; 1737 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1738 for (i=0; i<N; i++) { 1739 svalues[starts[owner[i]]++] = rows[i]; 1740 } 1741 1742 starts[0] = 0; 1743 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1744 count = 0; 1745 for (i=0; i<size; i++) { 1746 if (nprocs[2*i+1]) { 1747 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1748 } 1749 } 1750 ierr = PetscFree(starts);CHKERRQ(ierr); 1751 1752 base = owners[rank]; 1753 1754 /* wait on receives */ 1755 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 1756 source = lens + nrecvs; 1757 count = nrecvs; slen = 0; 1758 while (count) { 1759 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1760 /* unpack receives into our local space */ 1761 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1762 source[imdex] = recv_status.MPI_SOURCE; 1763 lens[imdex] = n; 1764 slen += n; 1765 count--; 1766 } 1767 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1768 1769 /* move the data into the send scatter */ 1770 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1771 count = 0; 1772 for (i=0; i<nrecvs; i++) { 1773 values = rvalues + i*nmax; 1774 for (j=0; j<lens[i]; j++) { 1775 lrows[count++] = values[j] - base; 1776 } 1777 } 1778 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1779 ierr = PetscFree(lens);CHKERRQ(ierr); 1780 ierr = PetscFree(owner);CHKERRQ(ierr); 1781 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1782 1783 /* actually zap the local rows */ 1784 /* 1785 Zero the required rows. If the "diagonal block" of the matrix 1786 is square and the user wishes to set the diagonal we use separate 1787 code so that MatSetValues() is not called for each diagonal allocating 1788 new memory, thus calling lots of mallocs and slowing things down. 1789 1790 Contributed by: Matthew Knepley 1791 */ 1792 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1793 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr); 1794 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 1795 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr); 1796 } else if (diag != 0.0) { 1797 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1798 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1799 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1800 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1801 } 1802 for (i=0; i<slen; i++) { 1803 row = lrows[i] + rstart_bs; 1804 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1805 } 1806 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1807 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1808 } else { 1809 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1810 } 1811 1812 ierr = PetscFree(lrows);CHKERRQ(ierr); 1813 1814 /* wait on sends */ 1815 if (nsends) { 1816 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1817 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1818 ierr = PetscFree(send_status);CHKERRQ(ierr); 1819 } 1820 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1821 ierr = PetscFree(svalues);CHKERRQ(ierr); 1822 1823 PetscFunctionReturn(0); 1824 } 1825 1826 #undef __FUNCT__ 1827 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1828 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1829 { 1830 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1831 PetscErrorCode ierr; 1832 1833 PetscFunctionBegin; 1834 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1835 PetscFunctionReturn(0); 1836 } 1837 1838 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1839 1840 #undef __FUNCT__ 1841 #define __FUNCT__ "MatEqual_MPIBAIJ" 1842 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1843 { 1844 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1845 Mat a,b,c,d; 1846 PetscTruth flg; 1847 PetscErrorCode ierr; 1848 1849 PetscFunctionBegin; 1850 a = matA->A; b = matA->B; 1851 c = matB->A; d = matB->B; 1852 1853 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1854 if (flg) { 1855 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1856 } 1857 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1858 PetscFunctionReturn(0); 1859 } 1860 1861 #undef __FUNCT__ 1862 #define __FUNCT__ "MatCopy_MPIBAIJ" 1863 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1864 { 1865 PetscErrorCode ierr; 1866 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1867 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1868 1869 PetscFunctionBegin; 1870 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1871 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1872 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1873 } else { 1874 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1875 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1876 } 1877 PetscFunctionReturn(0); 1878 } 1879 1880 #undef __FUNCT__ 1881 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1882 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1883 { 1884 PetscErrorCode ierr; 1885 1886 PetscFunctionBegin; 1887 ierr = MatMPIBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1888 PetscFunctionReturn(0); 1889 } 1890 1891 #include "petscblaslapack.h" 1892 #undef __FUNCT__ 1893 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1894 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1895 { 1896 PetscErrorCode ierr; 1897 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1898 PetscBLASInt bnz,one=1; 1899 Mat_SeqBAIJ *x,*y; 1900 1901 PetscFunctionBegin; 1902 if (str == SAME_NONZERO_PATTERN) { 1903 PetscScalar alpha = a; 1904 x = (Mat_SeqBAIJ *)xx->A->data; 1905 y = (Mat_SeqBAIJ *)yy->A->data; 1906 bnz = (PetscBLASInt)x->nz; 1907 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1908 x = (Mat_SeqBAIJ *)xx->B->data; 1909 y = (Mat_SeqBAIJ *)yy->B->data; 1910 bnz = (PetscBLASInt)x->nz; 1911 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1912 } else { 1913 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1914 } 1915 PetscFunctionReturn(0); 1916 } 1917 1918 #undef __FUNCT__ 1919 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1920 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1921 { 1922 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1923 PetscErrorCode ierr; 1924 1925 PetscFunctionBegin; 1926 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1927 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1928 PetscFunctionReturn(0); 1929 } 1930 1931 #undef __FUNCT__ 1932 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 1933 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1934 { 1935 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1936 PetscErrorCode ierr; 1937 1938 PetscFunctionBegin; 1939 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1940 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1941 PetscFunctionReturn(0); 1942 } 1943 1944 /* -------------------------------------------------------------------*/ 1945 static struct _MatOps MatOps_Values = { 1946 MatSetValues_MPIBAIJ, 1947 MatGetRow_MPIBAIJ, 1948 MatRestoreRow_MPIBAIJ, 1949 MatMult_MPIBAIJ, 1950 /* 4*/ MatMultAdd_MPIBAIJ, 1951 MatMultTranspose_MPIBAIJ, 1952 MatMultTransposeAdd_MPIBAIJ, 1953 0, 1954 0, 1955 0, 1956 /*10*/ 0, 1957 0, 1958 0, 1959 0, 1960 MatTranspose_MPIBAIJ, 1961 /*15*/ MatGetInfo_MPIBAIJ, 1962 MatEqual_MPIBAIJ, 1963 MatGetDiagonal_MPIBAIJ, 1964 MatDiagonalScale_MPIBAIJ, 1965 MatNorm_MPIBAIJ, 1966 /*20*/ MatAssemblyBegin_MPIBAIJ, 1967 MatAssemblyEnd_MPIBAIJ, 1968 0, 1969 MatSetOption_MPIBAIJ, 1970 MatZeroEntries_MPIBAIJ, 1971 /*25*/ MatZeroRows_MPIBAIJ, 1972 0, 1973 0, 1974 0, 1975 0, 1976 /*30*/ MatSetUpPreallocation_MPIBAIJ, 1977 0, 1978 0, 1979 0, 1980 0, 1981 /*35*/ MatDuplicate_MPIBAIJ, 1982 0, 1983 0, 1984 0, 1985 0, 1986 /*40*/ MatAXPY_MPIBAIJ, 1987 MatGetSubMatrices_MPIBAIJ, 1988 MatIncreaseOverlap_MPIBAIJ, 1989 MatGetValues_MPIBAIJ, 1990 MatCopy_MPIBAIJ, 1991 /*45*/ 0, 1992 MatScale_MPIBAIJ, 1993 0, 1994 0, 1995 0, 1996 /*50*/ 0, 1997 0, 1998 0, 1999 0, 2000 0, 2001 /*55*/ 0, 2002 0, 2003 MatSetUnfactored_MPIBAIJ, 2004 0, 2005 MatSetValuesBlocked_MPIBAIJ, 2006 /*60*/ 0, 2007 MatDestroy_MPIBAIJ, 2008 MatView_MPIBAIJ, 2009 0, 2010 0, 2011 /*65*/ 0, 2012 0, 2013 0, 2014 0, 2015 0, 2016 /*70*/ MatGetRowMaxAbs_MPIBAIJ, 2017 0, 2018 0, 2019 0, 2020 0, 2021 /*75*/ 0, 2022 0, 2023 0, 2024 0, 2025 0, 2026 /*80*/ 0, 2027 0, 2028 0, 2029 0, 2030 MatLoad_MPIBAIJ, 2031 /*85*/ 0, 2032 0, 2033 0, 2034 0, 2035 0, 2036 /*90*/ 0, 2037 0, 2038 0, 2039 0, 2040 0, 2041 /*95*/ 0, 2042 0, 2043 0, 2044 0, 2045 0, 2046 /*100*/0, 2047 0, 2048 0, 2049 0, 2050 0, 2051 /*105*/0, 2052 MatRealPart_MPIBAIJ, 2053 MatImaginaryPart_MPIBAIJ}; 2054 2055 2056 EXTERN_C_BEGIN 2057 #undef __FUNCT__ 2058 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2059 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2060 { 2061 PetscFunctionBegin; 2062 *a = ((Mat_MPIBAIJ *)A->data)->A; 2063 *iscopy = PETSC_FALSE; 2064 PetscFunctionReturn(0); 2065 } 2066 EXTERN_C_END 2067 2068 EXTERN_C_BEGIN 2069 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2070 EXTERN_C_END 2071 2072 #undef __FUNCT__ 2073 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2074 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 2075 { 2076 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)B->data; 2077 PetscInt m = B->rmap.n/bs,cstart = baij->cstartbs, cend = baij->cendbs,j,nnz,i,d; 2078 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = baij->rstartbs,ii; 2079 const PetscInt *JJ; 2080 PetscScalar *values; 2081 PetscErrorCode ierr; 2082 2083 PetscFunctionBegin; 2084 #if defined(PETSC_OPT_g) 2085 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"Ii[0] must be 0 it is %D",Ii[0]); 2086 #endif 2087 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2088 o_nnz = d_nnz + m; 2089 2090 for (i=0; i<m; i++) { 2091 nnz = Ii[i+1]- Ii[i]; 2092 JJ = J + Ii[i]; 2093 nnz_max = PetscMax(nnz_max,nnz); 2094 #if defined(PETSC_OPT_g) 2095 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2096 #endif 2097 for (j=0; j<nnz; j++) { 2098 if (*JJ >= cstart) break; 2099 JJ++; 2100 } 2101 d = 0; 2102 for (; j<nnz; j++) { 2103 if (*JJ++ >= cend) break; 2104 d++; 2105 } 2106 d_nnz[i] = d; 2107 o_nnz[i] = nnz - d; 2108 } 2109 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2110 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2111 2112 if (v) values = (PetscScalar*)v; 2113 else { 2114 ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2115 ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2116 } 2117 2118 for (i=0; i<m; i++) { 2119 ii = i + rstart; 2120 nnz = Ii[i+1]- Ii[i]; 2121 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2122 } 2123 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2124 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2125 2126 if (!v) { 2127 ierr = PetscFree(values);CHKERRQ(ierr); 2128 } 2129 PetscFunctionReturn(0); 2130 } 2131 2132 #undef __FUNCT__ 2133 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2134 /*@C 2135 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2136 (the default parallel PETSc format). 2137 2138 Collective on MPI_Comm 2139 2140 Input Parameters: 2141 + A - the matrix 2142 . i - the indices into j for the start of each local row (starts with zero) 2143 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2144 - v - optional values in the matrix 2145 2146 Level: developer 2147 2148 .keywords: matrix, aij, compressed row, sparse, parallel 2149 2150 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2151 @*/ 2152 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2153 { 2154 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2155 2156 PetscFunctionBegin; 2157 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2158 if (f) { 2159 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2160 } 2161 PetscFunctionReturn(0); 2162 } 2163 2164 EXTERN_C_BEGIN 2165 #undef __FUNCT__ 2166 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2167 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2168 { 2169 Mat_MPIBAIJ *b; 2170 PetscErrorCode ierr; 2171 PetscInt i; 2172 2173 PetscFunctionBegin; 2174 B->preallocated = PETSC_TRUE; 2175 ierr = PetscOptionsBegin(B->comm,B->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 2176 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2177 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2178 2179 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2180 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2181 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2182 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2183 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2184 2185 B->rmap.bs = bs; 2186 B->cmap.bs = bs; 2187 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2188 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2189 2190 if (d_nnz) { 2191 for (i=0; i<B->rmap.n/bs; i++) { 2192 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2193 } 2194 } 2195 if (o_nnz) { 2196 for (i=0; i<B->rmap.n/bs; i++) { 2197 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2198 } 2199 } 2200 2201 b = (Mat_MPIBAIJ*)B->data; 2202 b->bs2 = bs*bs; 2203 b->mbs = B->rmap.n/bs; 2204 b->nbs = B->cmap.n/bs; 2205 b->Mbs = B->rmap.N/bs; 2206 b->Nbs = B->cmap.N/bs; 2207 2208 for (i=0; i<=b->size; i++) { 2209 b->rangebs[i] = B->rmap.range[i]/bs; 2210 } 2211 b->rstartbs = B->rmap.rstart/bs; 2212 b->rendbs = B->rmap.rend/bs; 2213 b->cstartbs = B->cmap.rstart/bs; 2214 b->cendbs = B->cmap.rend/bs; 2215 2216 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2217 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 2218 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2219 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2220 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2221 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2222 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 2223 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2224 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2225 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2226 2227 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2228 2229 PetscFunctionReturn(0); 2230 } 2231 EXTERN_C_END 2232 2233 EXTERN_C_BEGIN 2234 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2235 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2236 EXTERN_C_END 2237 2238 /*MC 2239 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2240 2241 Options Database Keys: 2242 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2243 . -mat_block_size <bs> - set the blocksize used to store the matrix 2244 - -mat_use_hash_table <fact> 2245 2246 Level: beginner 2247 2248 .seealso: MatCreateMPIBAIJ 2249 M*/ 2250 2251 EXTERN_C_BEGIN 2252 #undef __FUNCT__ 2253 #define __FUNCT__ "MatCreate_MPIBAIJ" 2254 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 2255 { 2256 Mat_MPIBAIJ *b; 2257 PetscErrorCode ierr; 2258 PetscTruth flg; 2259 2260 PetscFunctionBegin; 2261 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2262 B->data = (void*)b; 2263 2264 2265 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2266 B->mapping = 0; 2267 B->factor = 0; 2268 B->assembled = PETSC_FALSE; 2269 2270 B->insertmode = NOT_SET_VALUES; 2271 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 2272 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 2273 2274 /* build local table of row and column ownerships */ 2275 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 2276 2277 /* build cache for off array entries formed */ 2278 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2279 b->donotstash = PETSC_FALSE; 2280 b->colmap = PETSC_NULL; 2281 b->garray = PETSC_NULL; 2282 b->roworiented = PETSC_TRUE; 2283 2284 #if defined(PETSC_USE_MAT_SINGLE) 2285 /* stuff for MatSetValues_XXX in single precision */ 2286 b->setvalueslen = 0; 2287 b->setvaluescopy = PETSC_NULL; 2288 #endif 2289 2290 /* stuff used in block assembly */ 2291 b->barray = 0; 2292 2293 /* stuff used for matrix vector multiply */ 2294 b->lvec = 0; 2295 b->Mvctx = 0; 2296 2297 /* stuff for MatGetRow() */ 2298 b->rowindices = 0; 2299 b->rowvalues = 0; 2300 b->getrowactive = PETSC_FALSE; 2301 2302 /* hash table stuff */ 2303 b->ht = 0; 2304 b->hd = 0; 2305 b->ht_size = 0; 2306 b->ht_flag = PETSC_FALSE; 2307 b->ht_fact = 0; 2308 b->ht_total_ct = 0; 2309 b->ht_insert_ct = 0; 2310 2311 ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 2312 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2313 if (flg) { 2314 PetscReal fact = 1.39; 2315 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2316 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 2317 if (fact <= 1.0) fact = 1.39; 2318 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2319 ierr = PetscInfo1(0,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2320 } 2321 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2322 2323 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2324 "MatStoreValues_MPIBAIJ", 2325 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2326 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2327 "MatRetrieveValues_MPIBAIJ", 2328 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2329 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2330 "MatGetDiagonalBlock_MPIBAIJ", 2331 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2332 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2333 "MatMPIBAIJSetPreallocation_MPIBAIJ", 2334 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2335 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 2336 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 2337 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 2338 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 2339 "MatDiagonalScaleLocal_MPIBAIJ", 2340 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2341 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 2342 "MatSetHashTableFactor_MPIBAIJ", 2343 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2344 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 2345 PetscFunctionReturn(0); 2346 } 2347 EXTERN_C_END 2348 2349 /*MC 2350 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2351 2352 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2353 and MATMPIBAIJ otherwise. 2354 2355 Options Database Keys: 2356 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2357 2358 Level: beginner 2359 2360 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2361 M*/ 2362 2363 EXTERN_C_BEGIN 2364 #undef __FUNCT__ 2365 #define __FUNCT__ "MatCreate_BAIJ" 2366 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 2367 { 2368 PetscErrorCode ierr; 2369 PetscMPIInt size; 2370 2371 PetscFunctionBegin; 2372 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2373 if (size == 1) { 2374 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 2375 } else { 2376 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 2377 } 2378 PetscFunctionReturn(0); 2379 } 2380 EXTERN_C_END 2381 2382 #undef __FUNCT__ 2383 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2384 /*@C 2385 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2386 (block compressed row). For good matrix assembly performance 2387 the user should preallocate the matrix storage by setting the parameters 2388 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2389 performance can be increased by more than a factor of 50. 2390 2391 Collective on Mat 2392 2393 Input Parameters: 2394 + A - the matrix 2395 . bs - size of blockk 2396 . d_nz - number of block nonzeros per block row in diagonal portion of local 2397 submatrix (same for all local rows) 2398 . d_nnz - array containing the number of block nonzeros in the various block rows 2399 of the in diagonal portion of the local (possibly different for each block 2400 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2401 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2402 submatrix (same for all local rows). 2403 - o_nnz - array containing the number of nonzeros in the various block rows of the 2404 off-diagonal portion of the local submatrix (possibly different for 2405 each block row) or PETSC_NULL. 2406 2407 If the *_nnz parameter is given then the *_nz parameter is ignored 2408 2409 Options Database Keys: 2410 + -mat_block_size - size of the blocks to use 2411 - -mat_use_hash_table <fact> 2412 2413 Notes: 2414 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2415 than it must be used on all processors that share the object for that argument. 2416 2417 Storage Information: 2418 For a square global matrix we define each processor's diagonal portion 2419 to be its local rows and the corresponding columns (a square submatrix); 2420 each processor's off-diagonal portion encompasses the remainder of the 2421 local matrix (a rectangular submatrix). 2422 2423 The user can specify preallocated storage for the diagonal part of 2424 the local submatrix with either d_nz or d_nnz (not both). Set 2425 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2426 memory allocation. Likewise, specify preallocated storage for the 2427 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2428 2429 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2430 the figure below we depict these three local rows and all columns (0-11). 2431 2432 .vb 2433 0 1 2 3 4 5 6 7 8 9 10 11 2434 ------------------- 2435 row 3 | o o o d d d o o o o o o 2436 row 4 | o o o d d d o o o o o o 2437 row 5 | o o o d d d o o o o o o 2438 ------------------- 2439 .ve 2440 2441 Thus, any entries in the d locations are stored in the d (diagonal) 2442 submatrix, and any entries in the o locations are stored in the 2443 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2444 stored simply in the MATSEQBAIJ format for compressed row storage. 2445 2446 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2447 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2448 In general, for PDE problems in which most nonzeros are near the diagonal, 2449 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2450 or you will get TERRIBLE performance; see the users' manual chapter on 2451 matrices. 2452 2453 Level: intermediate 2454 2455 .keywords: matrix, block, aij, compressed row, sparse, parallel 2456 2457 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 2458 @*/ 2459 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2460 { 2461 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2462 2463 PetscFunctionBegin; 2464 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2465 if (f) { 2466 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2467 } 2468 PetscFunctionReturn(0); 2469 } 2470 2471 #undef __FUNCT__ 2472 #define __FUNCT__ "MatCreateMPIBAIJ" 2473 /*@C 2474 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 2475 (block compressed row). For good matrix assembly performance 2476 the user should preallocate the matrix storage by setting the parameters 2477 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2478 performance can be increased by more than a factor of 50. 2479 2480 Collective on MPI_Comm 2481 2482 Input Parameters: 2483 + comm - MPI communicator 2484 . bs - size of blockk 2485 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2486 This value should be the same as the local size used in creating the 2487 y vector for the matrix-vector product y = Ax. 2488 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2489 This value should be the same as the local size used in creating the 2490 x vector for the matrix-vector product y = Ax. 2491 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2492 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2493 . d_nz - number of nonzero blocks per block row in diagonal portion of local 2494 submatrix (same for all local rows) 2495 . d_nnz - array containing the number of nonzero blocks in the various block rows 2496 of the in diagonal portion of the local (possibly different for each block 2497 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2498 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 2499 submatrix (same for all local rows). 2500 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 2501 off-diagonal portion of the local submatrix (possibly different for 2502 each block row) or PETSC_NULL. 2503 2504 Output Parameter: 2505 . A - the matrix 2506 2507 Options Database Keys: 2508 + -mat_block_size - size of the blocks to use 2509 - -mat_use_hash_table <fact> 2510 2511 Notes: 2512 If the *_nnz parameter is given then the *_nz parameter is ignored 2513 2514 A nonzero block is any block that as 1 or more nonzeros in it 2515 2516 The user MUST specify either the local or global matrix dimensions 2517 (possibly both). 2518 2519 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2520 than it must be used on all processors that share the object for that argument. 2521 2522 Storage Information: 2523 For a square global matrix we define each processor's diagonal portion 2524 to be its local rows and the corresponding columns (a square submatrix); 2525 each processor's off-diagonal portion encompasses the remainder of the 2526 local matrix (a rectangular submatrix). 2527 2528 The user can specify preallocated storage for the diagonal part of 2529 the local submatrix with either d_nz or d_nnz (not both). Set 2530 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2531 memory allocation. Likewise, specify preallocated storage for the 2532 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2533 2534 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2535 the figure below we depict these three local rows and all columns (0-11). 2536 2537 .vb 2538 0 1 2 3 4 5 6 7 8 9 10 11 2539 ------------------- 2540 row 3 | o o o d d d o o o o o o 2541 row 4 | o o o d d d o o o o o o 2542 row 5 | o o o d d d o o o o o o 2543 ------------------- 2544 .ve 2545 2546 Thus, any entries in the d locations are stored in the d (diagonal) 2547 submatrix, and any entries in the o locations are stored in the 2548 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2549 stored simply in the MATSEQBAIJ format for compressed row storage. 2550 2551 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2552 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2553 In general, for PDE problems in which most nonzeros are near the diagonal, 2554 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2555 or you will get TERRIBLE performance; see the users' manual chapter on 2556 matrices. 2557 2558 Level: intermediate 2559 2560 .keywords: matrix, block, aij, compressed row, sparse, parallel 2561 2562 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2563 @*/ 2564 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2565 { 2566 PetscErrorCode ierr; 2567 PetscMPIInt size; 2568 2569 PetscFunctionBegin; 2570 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2571 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2572 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2573 if (size > 1) { 2574 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2575 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2576 } else { 2577 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2578 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2579 } 2580 PetscFunctionReturn(0); 2581 } 2582 2583 #undef __FUNCT__ 2584 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 2585 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2586 { 2587 Mat mat; 2588 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2589 PetscErrorCode ierr; 2590 PetscInt len=0; 2591 2592 PetscFunctionBegin; 2593 *newmat = 0; 2594 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 2595 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2596 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2597 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2598 2599 mat->factor = matin->factor; 2600 mat->preallocated = PETSC_TRUE; 2601 mat->assembled = PETSC_TRUE; 2602 mat->insertmode = NOT_SET_VALUES; 2603 2604 a = (Mat_MPIBAIJ*)mat->data; 2605 mat->rmap.bs = matin->rmap.bs; 2606 a->bs2 = oldmat->bs2; 2607 a->mbs = oldmat->mbs; 2608 a->nbs = oldmat->nbs; 2609 a->Mbs = oldmat->Mbs; 2610 a->Nbs = oldmat->Nbs; 2611 2612 ierr = PetscMapCopy(matin->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2613 ierr = PetscMapCopy(matin->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2614 2615 a->size = oldmat->size; 2616 a->rank = oldmat->rank; 2617 a->donotstash = oldmat->donotstash; 2618 a->roworiented = oldmat->roworiented; 2619 a->rowindices = 0; 2620 a->rowvalues = 0; 2621 a->getrowactive = PETSC_FALSE; 2622 a->barray = 0; 2623 a->rstartbs = oldmat->rstartbs; 2624 a->rendbs = oldmat->rendbs; 2625 a->cstartbs = oldmat->cstartbs; 2626 a->cendbs = oldmat->cendbs; 2627 2628 /* hash table stuff */ 2629 a->ht = 0; 2630 a->hd = 0; 2631 a->ht_size = 0; 2632 a->ht_flag = oldmat->ht_flag; 2633 a->ht_fact = oldmat->ht_fact; 2634 a->ht_total_ct = 0; 2635 a->ht_insert_ct = 0; 2636 2637 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 2638 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2639 ierr = MatStashCreate_Private(matin->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr); 2640 if (oldmat->colmap) { 2641 #if defined (PETSC_USE_CTABLE) 2642 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2643 #else 2644 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2645 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2646 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2647 #endif 2648 } else a->colmap = 0; 2649 2650 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2651 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2652 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2653 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2654 } else a->garray = 0; 2655 2656 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2657 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2658 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2659 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2660 2661 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2662 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2663 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2664 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2665 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2666 *newmat = mat; 2667 2668 PetscFunctionReturn(0); 2669 } 2670 2671 #include "petscsys.h" 2672 2673 #undef __FUNCT__ 2674 #define __FUNCT__ "MatLoad_MPIBAIJ" 2675 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2676 { 2677 Mat A; 2678 PetscErrorCode ierr; 2679 int fd; 2680 PetscInt i,nz,j,rstart,rend; 2681 PetscScalar *vals,*buf; 2682 MPI_Comm comm = ((PetscObject)viewer)->comm; 2683 MPI_Status status; 2684 PetscMPIInt rank,size,maxnz; 2685 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 2686 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 2687 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 2688 PetscMPIInt tag = ((PetscObject)viewer)->tag; 2689 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 2690 PetscInt dcount,kmax,k,nzcount,tmp,mend; 2691 2692 PetscFunctionBegin; 2693 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 2694 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2695 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2696 2697 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2698 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2699 if (!rank) { 2700 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2701 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2702 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2703 } 2704 2705 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2706 M = header[1]; N = header[2]; 2707 2708 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2709 2710 /* 2711 This code adds extra rows to make sure the number of rows is 2712 divisible by the blocksize 2713 */ 2714 Mbs = M/bs; 2715 extra_rows = bs - M + bs*Mbs; 2716 if (extra_rows == bs) extra_rows = 0; 2717 else Mbs++; 2718 if (extra_rows && !rank) { 2719 ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2720 } 2721 2722 /* determine ownership of all rows */ 2723 mbs = Mbs/size + ((Mbs % size) > rank); 2724 m = mbs*bs; 2725 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 2726 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2727 2728 /* process 0 needs enough room for process with most rows */ 2729 if (!rank) { 2730 mmax = rowners[1]; 2731 for (i=2; i<size; i++) { 2732 mmax = PetscMax(mmax,rowners[i]); 2733 } 2734 mmax*=bs; 2735 } else mmax = m; 2736 2737 rowners[0] = 0; 2738 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2739 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2740 rstart = rowners[rank]; 2741 rend = rowners[rank+1]; 2742 2743 /* distribute row lengths to all processors */ 2744 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 2745 if (!rank) { 2746 mend = m; 2747 if (size == 1) mend = mend - extra_rows; 2748 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 2749 for (j=mend; j<m; j++) locrowlens[j] = 1; 2750 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2751 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2752 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2753 for (j=0; j<m; j++) { 2754 procsnz[0] += locrowlens[j]; 2755 } 2756 for (i=1; i<size; i++) { 2757 mend = browners[i+1] - browners[i]; 2758 if (i == size-1) mend = mend - extra_rows; 2759 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 2760 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 2761 /* calculate the number of nonzeros on each processor */ 2762 for (j=0; j<browners[i+1]-browners[i]; j++) { 2763 procsnz[i] += rowlengths[j]; 2764 } 2765 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2766 } 2767 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2768 } else { 2769 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2770 } 2771 2772 if (!rank) { 2773 /* determine max buffer needed and allocate it */ 2774 maxnz = procsnz[0]; 2775 for (i=1; i<size; i++) { 2776 maxnz = PetscMax(maxnz,procsnz[i]); 2777 } 2778 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2779 2780 /* read in my part of the matrix column indices */ 2781 nz = procsnz[0]; 2782 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2783 mycols = ibuf; 2784 if (size == 1) nz -= extra_rows; 2785 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2786 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2787 2788 /* read in every ones (except the last) and ship off */ 2789 for (i=1; i<size-1; i++) { 2790 nz = procsnz[i]; 2791 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2792 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2793 } 2794 /* read in the stuff for the last proc */ 2795 if (size != 1) { 2796 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2797 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2798 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2799 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2800 } 2801 ierr = PetscFree(cols);CHKERRQ(ierr); 2802 } else { 2803 /* determine buffer space needed for message */ 2804 nz = 0; 2805 for (i=0; i<m; i++) { 2806 nz += locrowlens[i]; 2807 } 2808 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2809 mycols = ibuf; 2810 /* receive message of column indices*/ 2811 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2812 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2813 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2814 } 2815 2816 /* loop over local rows, determining number of off diagonal entries */ 2817 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2818 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2819 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2820 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2821 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2822 rowcount = 0; nzcount = 0; 2823 for (i=0; i<mbs; i++) { 2824 dcount = 0; 2825 odcount = 0; 2826 for (j=0; j<bs; j++) { 2827 kmax = locrowlens[rowcount]; 2828 for (k=0; k<kmax; k++) { 2829 tmp = mycols[nzcount++]/bs; 2830 if (!mask[tmp]) { 2831 mask[tmp] = 1; 2832 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2833 else masked1[dcount++] = tmp; 2834 } 2835 } 2836 rowcount++; 2837 } 2838 2839 dlens[i] = dcount; 2840 odlens[i] = odcount; 2841 2842 /* zero out the mask elements we set */ 2843 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2844 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2845 } 2846 2847 /* create our matrix */ 2848 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2849 ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2850 ierr = MatSetType(A,type);CHKERRQ(ierr) 2851 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2852 2853 if (!rank) { 2854 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2855 /* read in my part of the matrix numerical values */ 2856 nz = procsnz[0]; 2857 vals = buf; 2858 mycols = ibuf; 2859 if (size == 1) nz -= extra_rows; 2860 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2861 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2862 2863 /* insert into matrix */ 2864 jj = rstart*bs; 2865 for (i=0; i<m; i++) { 2866 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2867 mycols += locrowlens[i]; 2868 vals += locrowlens[i]; 2869 jj++; 2870 } 2871 /* read in other processors (except the last one) and ship out */ 2872 for (i=1; i<size-1; i++) { 2873 nz = procsnz[i]; 2874 vals = buf; 2875 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2876 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2877 } 2878 /* the last proc */ 2879 if (size != 1){ 2880 nz = procsnz[i] - extra_rows; 2881 vals = buf; 2882 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2883 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2884 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2885 } 2886 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2887 } else { 2888 /* receive numeric values */ 2889 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2890 2891 /* receive message of values*/ 2892 vals = buf; 2893 mycols = ibuf; 2894 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2895 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2896 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2897 2898 /* insert into matrix */ 2899 jj = rstart*bs; 2900 for (i=0; i<m; i++) { 2901 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2902 mycols += locrowlens[i]; 2903 vals += locrowlens[i]; 2904 jj++; 2905 } 2906 } 2907 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2908 ierr = PetscFree(buf);CHKERRQ(ierr); 2909 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2910 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2911 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2912 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2913 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2914 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2915 2916 *newmat = A; 2917 PetscFunctionReturn(0); 2918 } 2919 2920 #undef __FUNCT__ 2921 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2922 /*@ 2923 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2924 2925 Input Parameters: 2926 . mat - the matrix 2927 . fact - factor 2928 2929 Collective on Mat 2930 2931 Level: advanced 2932 2933 Notes: 2934 This can also be set by the command line option: -mat_use_hash_table <fact> 2935 2936 .keywords: matrix, hashtable, factor, HT 2937 2938 .seealso: MatSetOption() 2939 @*/ 2940 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2941 { 2942 PetscErrorCode ierr,(*f)(Mat,PetscReal); 2943 2944 PetscFunctionBegin; 2945 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 2946 if (f) { 2947 ierr = (*f)(mat,fact);CHKERRQ(ierr); 2948 } 2949 PetscFunctionReturn(0); 2950 } 2951 2952 EXTERN_C_BEGIN 2953 #undef __FUNCT__ 2954 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 2955 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 2956 { 2957 Mat_MPIBAIJ *baij; 2958 2959 PetscFunctionBegin; 2960 baij = (Mat_MPIBAIJ*)mat->data; 2961 baij->ht_fact = fact; 2962 PetscFunctionReturn(0); 2963 } 2964 EXTERN_C_END 2965 2966 #undef __FUNCT__ 2967 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 2968 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2969 { 2970 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2971 PetscFunctionBegin; 2972 *Ad = a->A; 2973 *Ao = a->B; 2974 *colmap = a->garray; 2975 PetscFunctionReturn(0); 2976 } 2977