1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14 EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat); 15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar); 16 17 /* UGLY, ugly, ugly 18 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 19 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 20 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 21 converts the entries into single precision and then calls ..._MatScalar() to put them 22 into the single precision data structures. 23 */ 24 #if defined(PETSC_USE_MAT_SINGLE) 25 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 26 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 27 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 28 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 29 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 30 #else 31 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 32 #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 33 #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 34 #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 35 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 36 #endif 37 38 #undef __FUNCT__ 39 #define __FUNCT__ "MatGetRowMax_MPIBAIJ" 40 PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v) 41 { 42 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 43 PetscErrorCode ierr; 44 PetscInt i; 45 PetscScalar *va,*vb; 46 Vec vtmp; 47 48 PetscFunctionBegin; 49 50 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 51 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 52 53 ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr); 54 ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr); 55 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 56 57 for (i=0; i<A->m; i++){ 58 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i]; 59 } 60 61 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 62 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 63 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 64 65 PetscFunctionReturn(0); 66 } 67 68 EXTERN_C_BEGIN 69 #undef __FUNCT__ 70 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 71 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 72 { 73 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 74 PetscErrorCode ierr; 75 76 PetscFunctionBegin; 77 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 78 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 79 PetscFunctionReturn(0); 80 } 81 EXTERN_C_END 82 83 EXTERN_C_BEGIN 84 #undef __FUNCT__ 85 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 86 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 87 { 88 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 89 PetscErrorCode ierr; 90 91 PetscFunctionBegin; 92 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 93 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 94 PetscFunctionReturn(0); 95 } 96 EXTERN_C_END 97 98 /* 99 Local utility routine that creates a mapping from the global column 100 number to the local number in the off-diagonal part of the local 101 storage of the matrix. This is done in a non scable way since the 102 length of colmap equals the global matrix length. 103 */ 104 #undef __FUNCT__ 105 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 106 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 107 { 108 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 109 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 110 PetscErrorCode ierr; 111 PetscInt nbs = B->nbs,i,bs=mat->bs; 112 113 PetscFunctionBegin; 114 #if defined (PETSC_USE_CTABLE) 115 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 116 for (i=0; i<nbs; i++){ 117 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 118 } 119 #else 120 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 121 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 122 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 123 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 124 #endif 125 PetscFunctionReturn(0); 126 } 127 128 #define CHUNKSIZE 10 129 130 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 131 { \ 132 \ 133 brow = row/bs; \ 134 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 135 rmax = aimax[brow]; nrow = ailen[brow]; \ 136 bcol = col/bs; \ 137 ridx = row % bs; cidx = col % bs; \ 138 low = 0; high = nrow; \ 139 while (high-low > 3) { \ 140 t = (low+high)/2; \ 141 if (rp[t] > bcol) high = t; \ 142 else low = t; \ 143 } \ 144 for (_i=low; _i<high; _i++) { \ 145 if (rp[_i] > bcol) break; \ 146 if (rp[_i] == bcol) { \ 147 bap = ap + bs2*_i + bs*cidx + ridx; \ 148 if (addv == ADD_VALUES) *bap += value; \ 149 else *bap = value; \ 150 goto a_noinsert; \ 151 } \ 152 } \ 153 if (a->nonew == 1) goto a_noinsert; \ 154 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 155 MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,aimax,a->nonew); \ 156 N = nrow++ - 1; \ 157 /* shift up all the later entries in this row */ \ 158 for (ii=N; ii>=_i; ii--) { \ 159 rp[ii+1] = rp[ii]; \ 160 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 161 } \ 162 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 163 rp[_i] = bcol; \ 164 ap[bs2*_i + bs*cidx + ridx] = value; \ 165 a_noinsert:; \ 166 ailen[brow] = nrow; \ 167 } 168 169 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 170 { \ 171 brow = row/bs; \ 172 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 173 rmax = bimax[brow]; nrow = bilen[brow]; \ 174 bcol = col/bs; \ 175 ridx = row % bs; cidx = col % bs; \ 176 low = 0; high = nrow; \ 177 while (high-low > 3) { \ 178 t = (low+high)/2; \ 179 if (rp[t] > bcol) high = t; \ 180 else low = t; \ 181 } \ 182 for (_i=low; _i<high; _i++) { \ 183 if (rp[_i] > bcol) break; \ 184 if (rp[_i] == bcol) { \ 185 bap = ap + bs2*_i + bs*cidx + ridx; \ 186 if (addv == ADD_VALUES) *bap += value; \ 187 else *bap = value; \ 188 goto b_noinsert; \ 189 } \ 190 } \ 191 if (b->nonew == 1) goto b_noinsert; \ 192 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 193 MatSeqXAIJReallocateAIJ(b,bs2,nrow,brow,bcol,rmax,ba,bi,bj,b->mbs,rp,ap,bimax,b->nonew); \ 194 CHKMEMQ;\ 195 N = nrow++ - 1; \ 196 /* shift up all the later entries in this row */ \ 197 for (ii=N; ii>=_i; ii--) { \ 198 rp[ii+1] = rp[ii]; \ 199 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 200 } \ 201 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 202 rp[_i] = bcol; \ 203 ap[bs2*_i + bs*cidx + ridx] = value; \ 204 b_noinsert:; \ 205 bilen[brow] = nrow; \ 206 } 207 208 #if defined(PETSC_USE_MAT_SINGLE) 209 #undef __FUNCT__ 210 #define __FUNCT__ "MatSetValues_MPIBAIJ" 211 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 212 { 213 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 214 PetscErrorCode ierr; 215 PetscInt i,N = m*n; 216 MatScalar *vsingle; 217 218 PetscFunctionBegin; 219 if (N > b->setvalueslen) { 220 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 221 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 222 b->setvalueslen = N; 223 } 224 vsingle = b->setvaluescopy; 225 226 for (i=0; i<N; i++) { 227 vsingle[i] = v[i]; 228 } 229 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 230 PetscFunctionReturn(0); 231 } 232 233 #undef __FUNCT__ 234 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 235 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 236 { 237 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 238 PetscErrorCode ierr; 239 PetscInt i,N = m*n*b->bs2; 240 MatScalar *vsingle; 241 242 PetscFunctionBegin; 243 if (N > b->setvalueslen) { 244 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 245 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 246 b->setvalueslen = N; 247 } 248 vsingle = b->setvaluescopy; 249 for (i=0; i<N; i++) { 250 vsingle[i] = v[i]; 251 } 252 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 253 PetscFunctionReturn(0); 254 } 255 256 #undef __FUNCT__ 257 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 258 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 259 { 260 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 261 PetscErrorCode ierr; 262 PetscInt i,N = m*n; 263 MatScalar *vsingle; 264 265 PetscFunctionBegin; 266 if (N > b->setvalueslen) { 267 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 268 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 269 b->setvalueslen = N; 270 } 271 vsingle = b->setvaluescopy; 272 for (i=0; i<N; i++) { 273 vsingle[i] = v[i]; 274 } 275 ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 276 PetscFunctionReturn(0); 277 } 278 279 #undef __FUNCT__ 280 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 281 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 282 { 283 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 284 PetscErrorCode ierr; 285 PetscInt i,N = m*n*b->bs2; 286 MatScalar *vsingle; 287 288 PetscFunctionBegin; 289 if (N > b->setvalueslen) { 290 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 291 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 292 b->setvalueslen = N; 293 } 294 vsingle = b->setvaluescopy; 295 for (i=0; i<N; i++) { 296 vsingle[i] = v[i]; 297 } 298 ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 299 PetscFunctionReturn(0); 300 } 301 #endif 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 305 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 306 { 307 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 308 MatScalar value; 309 PetscTruth roworiented = baij->roworiented; 310 PetscErrorCode ierr; 311 PetscInt i,j,row,col; 312 PetscInt rstart_orig=baij->rstart_bs; 313 PetscInt rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 314 PetscInt cend_orig=baij->cend_bs,bs=mat->bs; 315 316 /* Some Variables required in the macro */ 317 Mat A = baij->A; 318 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 319 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 320 MatScalar *aa=a->a; 321 322 Mat B = baij->B; 323 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 324 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 325 MatScalar *ba=b->a; 326 327 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 328 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 329 MatScalar *ap,*bap; 330 331 PetscFunctionBegin; 332 for (i=0; i<m; i++) { 333 if (im[i] < 0) continue; 334 #if defined(PETSC_USE_DEBUG) 335 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 336 #endif 337 if (im[i] >= rstart_orig && im[i] < rend_orig) { 338 row = im[i] - rstart_orig; 339 for (j=0; j<n; j++) { 340 if (in[j] >= cstart_orig && in[j] < cend_orig){ 341 col = in[j] - cstart_orig; 342 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 343 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 344 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 345 } else if (in[j] < 0) continue; 346 #if defined(PETSC_USE_DEBUG) 347 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->N-1);} 348 #endif 349 else { 350 if (mat->was_assembled) { 351 if (!baij->colmap) { 352 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 353 } 354 #if defined (PETSC_USE_CTABLE) 355 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 356 col = col - 1; 357 #else 358 col = baij->colmap[in[j]/bs] - 1; 359 #endif 360 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 361 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 362 col = in[j]; 363 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 364 B = baij->B; 365 b = (Mat_SeqBAIJ*)(B)->data; 366 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 367 ba=b->a; 368 } else col += in[j]%bs; 369 } else col = in[j]; 370 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 371 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 372 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 373 } 374 } 375 } else { 376 if (!baij->donotstash) { 377 if (roworiented) { 378 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 379 } else { 380 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 381 } 382 } 383 } 384 } 385 PetscFunctionReturn(0); 386 } 387 388 #undef __FUNCT__ 389 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar" 390 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 391 { 392 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 393 const MatScalar *value; 394 MatScalar *barray=baij->barray; 395 PetscTruth roworiented = baij->roworiented; 396 PetscErrorCode ierr; 397 PetscInt i,j,ii,jj,row,col,rstart=baij->rstart; 398 PetscInt rend=baij->rend,cstart=baij->cstart,stepval; 399 PetscInt cend=baij->cend,bs=mat->bs,bs2=baij->bs2; 400 401 PetscFunctionBegin; 402 if(!barray) { 403 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 404 baij->barray = barray; 405 } 406 407 if (roworiented) { 408 stepval = (n-1)*bs; 409 } else { 410 stepval = (m-1)*bs; 411 } 412 for (i=0; i<m; i++) { 413 if (im[i] < 0) continue; 414 #if defined(PETSC_USE_DEBUG) 415 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 416 #endif 417 if (im[i] >= rstart && im[i] < rend) { 418 row = im[i] - rstart; 419 for (j=0; j<n; j++) { 420 /* If NumCol = 1 then a copy is not required */ 421 if ((roworiented) && (n == 1)) { 422 barray = (MatScalar*)v + i*bs2; 423 } else if((!roworiented) && (m == 1)) { 424 barray = (MatScalar*)v + j*bs2; 425 } else { /* Here a copy is required */ 426 if (roworiented) { 427 value = v + i*(stepval+bs)*bs + j*bs; 428 } else { 429 value = v + j*(stepval+bs)*bs + i*bs; 430 } 431 for (ii=0; ii<bs; ii++,value+=stepval) { 432 for (jj=0; jj<bs; jj++) { 433 *barray++ = *value++; 434 } 435 } 436 barray -=bs2; 437 } 438 439 if (in[j] >= cstart && in[j] < cend){ 440 col = in[j] - cstart; 441 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 442 } 443 else if (in[j] < 0) continue; 444 #if defined(PETSC_USE_DEBUG) 445 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 446 #endif 447 else { 448 if (mat->was_assembled) { 449 if (!baij->colmap) { 450 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 451 } 452 453 #if defined(PETSC_USE_DEBUG) 454 #if defined (PETSC_USE_CTABLE) 455 { PetscInt data; 456 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 457 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 458 } 459 #else 460 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 461 #endif 462 #endif 463 #if defined (PETSC_USE_CTABLE) 464 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 465 col = (col - 1)/bs; 466 #else 467 col = (baij->colmap[in[j]] - 1)/bs; 468 #endif 469 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 470 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 471 col = in[j]; 472 } 473 } 474 else col = in[j]; 475 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 476 } 477 } 478 } else { 479 if (!baij->donotstash) { 480 if (roworiented) { 481 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 482 } else { 483 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 484 } 485 } 486 } 487 } 488 PetscFunctionReturn(0); 489 } 490 491 #define HASH_KEY 0.6180339887 492 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 493 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 494 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 495 #undef __FUNCT__ 496 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar" 497 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 498 { 499 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 500 PetscTruth roworiented = baij->roworiented; 501 PetscErrorCode ierr; 502 PetscInt i,j,row,col; 503 PetscInt rstart_orig=baij->rstart_bs; 504 PetscInt rend_orig=baij->rend_bs,Nbs=baij->Nbs; 505 PetscInt h1,key,size=baij->ht_size,bs=mat->bs,*HT=baij->ht,idx; 506 PetscReal tmp; 507 MatScalar **HD = baij->hd,value; 508 #if defined(PETSC_USE_DEBUG) 509 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 510 #endif 511 512 PetscFunctionBegin; 513 514 for (i=0; i<m; i++) { 515 #if defined(PETSC_USE_DEBUG) 516 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 517 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 518 #endif 519 row = im[i]; 520 if (row >= rstart_orig && row < rend_orig) { 521 for (j=0; j<n; j++) { 522 col = in[j]; 523 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 524 /* Look up PetscInto the Hash Table */ 525 key = (row/bs)*Nbs+(col/bs)+1; 526 h1 = HASH(size,key,tmp); 527 528 529 idx = h1; 530 #if defined(PETSC_USE_DEBUG) 531 insert_ct++; 532 total_ct++; 533 if (HT[idx] != key) { 534 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 535 if (idx == size) { 536 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 537 if (idx == h1) { 538 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 539 } 540 } 541 } 542 #else 543 if (HT[idx] != key) { 544 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 545 if (idx == size) { 546 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 547 if (idx == h1) { 548 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 549 } 550 } 551 } 552 #endif 553 /* A HASH table entry is found, so insert the values at the correct address */ 554 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 555 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 556 } 557 } else { 558 if (!baij->donotstash) { 559 if (roworiented) { 560 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 561 } else { 562 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 563 } 564 } 565 } 566 } 567 #if defined(PETSC_USE_DEBUG) 568 baij->ht_total_ct = total_ct; 569 baij->ht_insert_ct = insert_ct; 570 #endif 571 PetscFunctionReturn(0); 572 } 573 574 #undef __FUNCT__ 575 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 576 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 577 { 578 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 579 PetscTruth roworiented = baij->roworiented; 580 PetscErrorCode ierr; 581 PetscInt i,j,ii,jj,row,col; 582 PetscInt rstart=baij->rstart ; 583 PetscInt rend=baij->rend,stepval,bs=mat->bs,bs2=baij->bs2; 584 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 585 PetscReal tmp; 586 MatScalar **HD = baij->hd,*baij_a; 587 const MatScalar *v_t,*value; 588 #if defined(PETSC_USE_DEBUG) 589 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 590 #endif 591 592 PetscFunctionBegin; 593 594 if (roworiented) { 595 stepval = (n-1)*bs; 596 } else { 597 stepval = (m-1)*bs; 598 } 599 for (i=0; i<m; i++) { 600 #if defined(PETSC_USE_DEBUG) 601 if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 602 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 603 #endif 604 row = im[i]; 605 v_t = v + i*bs2; 606 if (row >= rstart && row < rend) { 607 for (j=0; j<n; j++) { 608 col = in[j]; 609 610 /* Look up into the Hash Table */ 611 key = row*Nbs+col+1; 612 h1 = HASH(size,key,tmp); 613 614 idx = h1; 615 #if defined(PETSC_USE_DEBUG) 616 total_ct++; 617 insert_ct++; 618 if (HT[idx] != key) { 619 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 620 if (idx == size) { 621 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 622 if (idx == h1) { 623 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 624 } 625 } 626 } 627 #else 628 if (HT[idx] != key) { 629 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 630 if (idx == size) { 631 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 632 if (idx == h1) { 633 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 634 } 635 } 636 } 637 #endif 638 baij_a = HD[idx]; 639 if (roworiented) { 640 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 641 /* value = v + (i*(stepval+bs)+j)*bs; */ 642 value = v_t; 643 v_t += bs; 644 if (addv == ADD_VALUES) { 645 for (ii=0; ii<bs; ii++,value+=stepval) { 646 for (jj=ii; jj<bs2; jj+=bs) { 647 baij_a[jj] += *value++; 648 } 649 } 650 } else { 651 for (ii=0; ii<bs; ii++,value+=stepval) { 652 for (jj=ii; jj<bs2; jj+=bs) { 653 baij_a[jj] = *value++; 654 } 655 } 656 } 657 } else { 658 value = v + j*(stepval+bs)*bs + i*bs; 659 if (addv == ADD_VALUES) { 660 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 661 for (jj=0; jj<bs; jj++) { 662 baij_a[jj] += *value++; 663 } 664 } 665 } else { 666 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 667 for (jj=0; jj<bs; jj++) { 668 baij_a[jj] = *value++; 669 } 670 } 671 } 672 } 673 } 674 } else { 675 if (!baij->donotstash) { 676 if (roworiented) { 677 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 678 } else { 679 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 680 } 681 } 682 } 683 } 684 #if defined(PETSC_USE_DEBUG) 685 baij->ht_total_ct = total_ct; 686 baij->ht_insert_ct = insert_ct; 687 #endif 688 PetscFunctionReturn(0); 689 } 690 691 #undef __FUNCT__ 692 #define __FUNCT__ "MatGetValues_MPIBAIJ" 693 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 694 { 695 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 696 PetscErrorCode ierr; 697 PetscInt bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 698 PetscInt bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 699 700 PetscFunctionBegin; 701 for (i=0; i<m; i++) { 702 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 703 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 704 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 705 row = idxm[i] - bsrstart; 706 for (j=0; j<n; j++) { 707 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 708 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 709 if (idxn[j] >= bscstart && idxn[j] < bscend){ 710 col = idxn[j] - bscstart; 711 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 712 } else { 713 if (!baij->colmap) { 714 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 715 } 716 #if defined (PETSC_USE_CTABLE) 717 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 718 data --; 719 #else 720 data = baij->colmap[idxn[j]/bs]-1; 721 #endif 722 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 723 else { 724 col = data + idxn[j]%bs; 725 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 726 } 727 } 728 } 729 } else { 730 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 731 } 732 } 733 PetscFunctionReturn(0); 734 } 735 736 #undef __FUNCT__ 737 #define __FUNCT__ "MatNorm_MPIBAIJ" 738 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 739 { 740 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 741 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 742 PetscErrorCode ierr; 743 PetscInt i,j,bs2=baij->bs2,bs=baij->A->bs,nz,row,col; 744 PetscReal sum = 0.0; 745 MatScalar *v; 746 747 PetscFunctionBegin; 748 if (baij->size == 1) { 749 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 750 } else { 751 if (type == NORM_FROBENIUS) { 752 v = amat->a; 753 nz = amat->nz*bs2; 754 for (i=0; i<nz; i++) { 755 #if defined(PETSC_USE_COMPLEX) 756 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 757 #else 758 sum += (*v)*(*v); v++; 759 #endif 760 } 761 v = bmat->a; 762 nz = bmat->nz*bs2; 763 for (i=0; i<nz; i++) { 764 #if defined(PETSC_USE_COMPLEX) 765 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 766 #else 767 sum += (*v)*(*v); v++; 768 #endif 769 } 770 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 771 *nrm = sqrt(*nrm); 772 } else if (type == NORM_1) { /* max column sum */ 773 PetscReal *tmp,*tmp2; 774 PetscInt *jj,*garray=baij->garray,cstart=baij->cstart; 775 ierr = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 776 tmp2 = tmp + mat->N; 777 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 778 v = amat->a; jj = amat->j; 779 for (i=0; i<amat->nz; i++) { 780 for (j=0; j<bs; j++){ 781 col = bs*(cstart + *jj) + j; /* column index */ 782 for (row=0; row<bs; row++){ 783 tmp[col] += PetscAbsScalar(*v); v++; 784 } 785 } 786 jj++; 787 } 788 v = bmat->a; jj = bmat->j; 789 for (i=0; i<bmat->nz; i++) { 790 for (j=0; j<bs; j++){ 791 col = bs*garray[*jj] + j; 792 for (row=0; row<bs; row++){ 793 tmp[col] += PetscAbsScalar(*v); v++; 794 } 795 } 796 jj++; 797 } 798 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 799 *nrm = 0.0; 800 for (j=0; j<mat->N; j++) { 801 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 802 } 803 ierr = PetscFree(tmp);CHKERRQ(ierr); 804 } else if (type == NORM_INFINITY) { /* max row sum */ 805 PetscReal *sums; 806 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr) 807 sum = 0.0; 808 for (j=0; j<amat->mbs; j++) { 809 for (row=0; row<bs; row++) sums[row] = 0.0; 810 v = amat->a + bs2*amat->i[j]; 811 nz = amat->i[j+1]-amat->i[j]; 812 for (i=0; i<nz; i++) { 813 for (col=0; col<bs; col++){ 814 for (row=0; row<bs; row++){ 815 sums[row] += PetscAbsScalar(*v); v++; 816 } 817 } 818 } 819 v = bmat->a + bs2*bmat->i[j]; 820 nz = bmat->i[j+1]-bmat->i[j]; 821 for (i=0; i<nz; i++) { 822 for (col=0; col<bs; col++){ 823 for (row=0; row<bs; row++){ 824 sums[row] += PetscAbsScalar(*v); v++; 825 } 826 } 827 } 828 for (row=0; row<bs; row++){ 829 if (sums[row] > sum) sum = sums[row]; 830 } 831 } 832 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 833 ierr = PetscFree(sums);CHKERRQ(ierr); 834 } else { 835 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 836 } 837 } 838 PetscFunctionReturn(0); 839 } 840 841 /* 842 Creates the hash table, and sets the table 843 This table is created only once. 844 If new entried need to be added to the matrix 845 then the hash table has to be destroyed and 846 recreated. 847 */ 848 #undef __FUNCT__ 849 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 850 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 851 { 852 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 853 Mat A = baij->A,B=baij->B; 854 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 855 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 856 PetscErrorCode ierr; 857 PetscInt size,bs2=baij->bs2,rstart=baij->rstart; 858 PetscInt cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 859 PetscInt *HT,key; 860 MatScalar **HD; 861 PetscReal tmp; 862 #if defined(PETSC_USE_VERBOSE) 863 PetscInt ct=0,max=0; 864 #endif 865 866 PetscFunctionBegin; 867 baij->ht_size=(PetscInt)(factor*nz); 868 size = baij->ht_size; 869 870 if (baij->ht) { 871 PetscFunctionReturn(0); 872 } 873 874 /* Allocate Memory for Hash Table */ 875 ierr = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 876 baij->ht = (PetscInt*)(baij->hd + size); 877 HD = baij->hd; 878 HT = baij->ht; 879 880 881 ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr); 882 883 884 /* Loop Over A */ 885 for (i=0; i<a->mbs; i++) { 886 for (j=ai[i]; j<ai[i+1]; j++) { 887 row = i+rstart; 888 col = aj[j]+cstart; 889 890 key = row*Nbs + col + 1; 891 h1 = HASH(size,key,tmp); 892 for (k=0; k<size; k++){ 893 if (!HT[(h1+k)%size]) { 894 HT[(h1+k)%size] = key; 895 HD[(h1+k)%size] = a->a + j*bs2; 896 break; 897 #if defined(PETSC_USE_VERBOSE) 898 } else { 899 ct++; 900 #endif 901 } 902 } 903 #if defined(PETSC_USE_VERBOSE) 904 if (k> max) max = k; 905 #endif 906 } 907 } 908 /* Loop Over B */ 909 for (i=0; i<b->mbs; i++) { 910 for (j=bi[i]; j<bi[i+1]; j++) { 911 row = i+rstart; 912 col = garray[bj[j]]; 913 key = row*Nbs + col + 1; 914 h1 = HASH(size,key,tmp); 915 for (k=0; k<size; k++){ 916 if (!HT[(h1+k)%size]) { 917 HT[(h1+k)%size] = key; 918 HD[(h1+k)%size] = b->a + j*bs2; 919 break; 920 #if defined(PETSC_USE_VERBOSE) 921 } else { 922 ct++; 923 #endif 924 } 925 } 926 #if defined(PETSC_USE_VERBOSE) 927 if (k> max) max = k; 928 #endif 929 } 930 } 931 932 /* Print Summary */ 933 #if defined(PETSC_USE_VERBOSE) 934 for (i=0,j=0; i<size; i++) { 935 if (HT[i]) {j++;} 936 } 937 ierr = PetscVerboseInfo((0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max));CHKERRQ(ierr); 938 #endif 939 PetscFunctionReturn(0); 940 } 941 942 #undef __FUNCT__ 943 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 944 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 945 { 946 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 947 PetscErrorCode ierr; 948 PetscInt nstash,reallocs; 949 InsertMode addv; 950 951 PetscFunctionBegin; 952 if (baij->donotstash) { 953 PetscFunctionReturn(0); 954 } 955 956 /* make sure all processors are either in INSERTMODE or ADDMODE */ 957 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 958 if (addv == (ADD_VALUES|INSERT_VALUES)) { 959 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 960 } 961 mat->insertmode = addv; /* in case this processor had no cache */ 962 963 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 964 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 965 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 966 ierr = PetscVerboseInfo((0,"MatAssemblyBegin_MPIBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 967 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 968 ierr = PetscVerboseInfo((0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 969 PetscFunctionReturn(0); 970 } 971 972 #undef __FUNCT__ 973 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 974 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 975 { 976 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 977 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 978 PetscErrorCode ierr; 979 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 980 PetscInt *row,*col,other_disassembled; 981 PetscTruth r1,r2,r3; 982 MatScalar *val; 983 InsertMode addv = mat->insertmode; 984 PetscMPIInt n; 985 986 PetscFunctionBegin; 987 if (!baij->donotstash) { 988 while (1) { 989 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 990 if (!flg) break; 991 992 for (i=0; i<n;) { 993 /* Now identify the consecutive vals belonging to the same row */ 994 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 995 if (j < n) ncols = j-i; 996 else ncols = n-i; 997 /* Now assemble all these values with a single function call */ 998 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 999 i = j; 1000 } 1001 } 1002 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 1003 /* Now process the block-stash. Since the values are stashed column-oriented, 1004 set the roworiented flag to column oriented, and after MatSetValues() 1005 restore the original flags */ 1006 r1 = baij->roworiented; 1007 r2 = a->roworiented; 1008 r3 = b->roworiented; 1009 baij->roworiented = PETSC_FALSE; 1010 a->roworiented = PETSC_FALSE; 1011 b->roworiented = PETSC_FALSE; 1012 while (1) { 1013 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1014 if (!flg) break; 1015 1016 for (i=0; i<n;) { 1017 /* Now identify the consecutive vals belonging to the same row */ 1018 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1019 if (j < n) ncols = j-i; 1020 else ncols = n-i; 1021 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1022 i = j; 1023 } 1024 } 1025 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1026 baij->roworiented = r1; 1027 a->roworiented = r2; 1028 b->roworiented = r3; 1029 } 1030 1031 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1032 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1033 1034 /* determine if any processor has disassembled, if so we must 1035 also disassemble ourselfs, in order that we may reassemble. */ 1036 /* 1037 if nonzero structure of submatrix B cannot change then we know that 1038 no processor disassembled thus we can skip this stuff 1039 */ 1040 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1041 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1042 if (mat->was_assembled && !other_disassembled) { 1043 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1044 } 1045 } 1046 1047 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1048 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1049 } 1050 b->compressedrow.use = PETSC_TRUE; 1051 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1052 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1053 1054 #if defined(PETSC_USE_VERBOSE) 1055 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1056 ierr = PetscVerboseInfo((0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct));CHKERRQ(ierr); 1057 baij->ht_total_ct = 0; 1058 baij->ht_insert_ct = 0; 1059 } 1060 #endif 1061 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1062 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1063 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1064 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1065 } 1066 1067 if (baij->rowvalues) { 1068 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1069 baij->rowvalues = 0; 1070 } 1071 PetscFunctionReturn(0); 1072 } 1073 1074 #undef __FUNCT__ 1075 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 1076 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1077 { 1078 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1079 PetscErrorCode ierr; 1080 PetscMPIInt size = baij->size,rank = baij->rank; 1081 PetscInt bs = mat->bs; 1082 PetscTruth iascii,isdraw; 1083 PetscViewer sviewer; 1084 PetscViewerFormat format; 1085 1086 PetscFunctionBegin; 1087 /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */ 1088 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1089 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1090 if (iascii) { 1091 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1092 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1093 MatInfo info; 1094 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1095 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1096 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 1097 rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 1098 mat->bs,(PetscInt)info.memory);CHKERRQ(ierr); 1099 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1100 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1101 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1102 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1103 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1104 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1105 PetscFunctionReturn(0); 1106 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1107 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1108 PetscFunctionReturn(0); 1109 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1110 PetscFunctionReturn(0); 1111 } 1112 } 1113 1114 if (isdraw) { 1115 PetscDraw draw; 1116 PetscTruth isnull; 1117 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1118 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1119 } 1120 1121 if (size == 1) { 1122 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 1123 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1124 } else { 1125 /* assemble the entire matrix onto first processor. */ 1126 Mat A; 1127 Mat_SeqBAIJ *Aloc; 1128 PetscInt M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1129 MatScalar *a; 1130 1131 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1132 /* Perhaps this should be the type of mat? */ 1133 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 1134 if (!rank) { 1135 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1136 } else { 1137 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1138 } 1139 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1140 ierr = MatMPIBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1141 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1142 1143 /* copy over the A part */ 1144 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1145 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1146 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1147 1148 for (i=0; i<mbs; i++) { 1149 rvals[0] = bs*(baij->rstart + i); 1150 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1151 for (j=ai[i]; j<ai[i+1]; j++) { 1152 col = (baij->cstart+aj[j])*bs; 1153 for (k=0; k<bs; k++) { 1154 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1155 col++; a += bs; 1156 } 1157 } 1158 } 1159 /* copy over the B part */ 1160 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1161 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1162 for (i=0; i<mbs; i++) { 1163 rvals[0] = bs*(baij->rstart + i); 1164 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1165 for (j=ai[i]; j<ai[i+1]; j++) { 1166 col = baij->garray[aj[j]]*bs; 1167 for (k=0; k<bs; k++) { 1168 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1169 col++; a += bs; 1170 } 1171 } 1172 } 1173 ierr = PetscFree(rvals);CHKERRQ(ierr); 1174 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1175 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1176 /* 1177 Everyone has to call to draw the matrix since the graphics waits are 1178 synchronized across all processors that share the PetscDraw object 1179 */ 1180 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1181 if (!rank) { 1182 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1183 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1184 } 1185 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1186 ierr = MatDestroy(A);CHKERRQ(ierr); 1187 } 1188 PetscFunctionReturn(0); 1189 } 1190 1191 #undef __FUNCT__ 1192 #define __FUNCT__ "MatView_MPIBAIJ" 1193 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1194 { 1195 PetscErrorCode ierr; 1196 PetscTruth iascii,isdraw,issocket,isbinary; 1197 1198 PetscFunctionBegin; 1199 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1200 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1201 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1202 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1203 if (iascii || isdraw || issocket || isbinary) { 1204 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1205 } else { 1206 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1207 } 1208 PetscFunctionReturn(0); 1209 } 1210 1211 #undef __FUNCT__ 1212 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1213 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1214 { 1215 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1216 PetscErrorCode ierr; 1217 1218 PetscFunctionBegin; 1219 #if defined(PETSC_USE_LOG) 1220 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N); 1221 #endif 1222 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1223 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1224 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1225 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1226 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1227 #if defined (PETSC_USE_CTABLE) 1228 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1229 #else 1230 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1231 #endif 1232 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1233 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1234 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1235 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1236 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1237 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1238 #if defined(PETSC_USE_MAT_SINGLE) 1239 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 1240 #endif 1241 ierr = PetscFree(baij);CHKERRQ(ierr); 1242 1243 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1244 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1245 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1246 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1247 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1248 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1249 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1250 PetscFunctionReturn(0); 1251 } 1252 1253 #undef __FUNCT__ 1254 #define __FUNCT__ "MatMult_MPIBAIJ" 1255 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1256 { 1257 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1258 PetscErrorCode ierr; 1259 PetscInt nt; 1260 1261 PetscFunctionBegin; 1262 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1263 if (nt != A->n) { 1264 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1265 } 1266 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1267 if (nt != A->m) { 1268 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1269 } 1270 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1271 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1272 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1273 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1274 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1275 PetscFunctionReturn(0); 1276 } 1277 1278 #undef __FUNCT__ 1279 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1280 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1281 { 1282 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1283 PetscErrorCode ierr; 1284 1285 PetscFunctionBegin; 1286 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1287 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1288 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1289 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1290 PetscFunctionReturn(0); 1291 } 1292 1293 #undef __FUNCT__ 1294 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1295 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1296 { 1297 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1298 PetscErrorCode ierr; 1299 PetscTruth merged; 1300 1301 PetscFunctionBegin; 1302 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1303 /* do nondiagonal part */ 1304 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1305 if (!merged) { 1306 /* send it on its way */ 1307 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1308 /* do local part */ 1309 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1310 /* receive remote parts: note this assumes the values are not actually */ 1311 /* inserted in yy until the next line */ 1312 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1313 } else { 1314 /* do local part */ 1315 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1316 /* send it on its way */ 1317 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1318 /* values actually were received in the Begin() but we need to call this nop */ 1319 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1320 } 1321 PetscFunctionReturn(0); 1322 } 1323 1324 #undef __FUNCT__ 1325 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1326 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1327 { 1328 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1329 PetscErrorCode ierr; 1330 1331 PetscFunctionBegin; 1332 /* do nondiagonal part */ 1333 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1334 /* send it on its way */ 1335 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1336 /* do local part */ 1337 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1338 /* receive remote parts: note this assumes the values are not actually */ 1339 /* inserted in yy until the next line, which is true for my implementation*/ 1340 /* but is not perhaps always true. */ 1341 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1342 PetscFunctionReturn(0); 1343 } 1344 1345 /* 1346 This only works correctly for square matrices where the subblock A->A is the 1347 diagonal block 1348 */ 1349 #undef __FUNCT__ 1350 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1351 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1352 { 1353 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1354 PetscErrorCode ierr; 1355 1356 PetscFunctionBegin; 1357 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1358 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1359 PetscFunctionReturn(0); 1360 } 1361 1362 #undef __FUNCT__ 1363 #define __FUNCT__ "MatScale_MPIBAIJ" 1364 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1365 { 1366 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1367 PetscErrorCode ierr; 1368 1369 PetscFunctionBegin; 1370 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1371 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1372 PetscFunctionReturn(0); 1373 } 1374 1375 #undef __FUNCT__ 1376 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1377 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1378 { 1379 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1380 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1381 PetscErrorCode ierr; 1382 PetscInt bs = matin->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1383 PetscInt nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1384 PetscInt *cmap,*idx_p,cstart = mat->cstart; 1385 1386 PetscFunctionBegin; 1387 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1388 mat->getrowactive = PETSC_TRUE; 1389 1390 if (!mat->rowvalues && (idx || v)) { 1391 /* 1392 allocate enough space to hold information from the longest row. 1393 */ 1394 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1395 PetscInt max = 1,mbs = mat->mbs,tmp; 1396 for (i=0; i<mbs; i++) { 1397 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1398 if (max < tmp) { max = tmp; } 1399 } 1400 ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1401 mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1402 } 1403 1404 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1405 lrow = row - brstart; 1406 1407 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1408 if (!v) {pvA = 0; pvB = 0;} 1409 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1410 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1411 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1412 nztot = nzA + nzB; 1413 1414 cmap = mat->garray; 1415 if (v || idx) { 1416 if (nztot) { 1417 /* Sort by increasing column numbers, assuming A and B already sorted */ 1418 PetscInt imark = -1; 1419 if (v) { 1420 *v = v_p = mat->rowvalues; 1421 for (i=0; i<nzB; i++) { 1422 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1423 else break; 1424 } 1425 imark = i; 1426 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1427 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1428 } 1429 if (idx) { 1430 *idx = idx_p = mat->rowindices; 1431 if (imark > -1) { 1432 for (i=0; i<imark; i++) { 1433 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1434 } 1435 } else { 1436 for (i=0; i<nzB; i++) { 1437 if (cmap[cworkB[i]/bs] < cstart) 1438 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1439 else break; 1440 } 1441 imark = i; 1442 } 1443 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1444 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1445 } 1446 } else { 1447 if (idx) *idx = 0; 1448 if (v) *v = 0; 1449 } 1450 } 1451 *nz = nztot; 1452 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1453 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1454 PetscFunctionReturn(0); 1455 } 1456 1457 #undef __FUNCT__ 1458 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1459 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1460 { 1461 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1462 1463 PetscFunctionBegin; 1464 if (!baij->getrowactive) { 1465 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1466 } 1467 baij->getrowactive = PETSC_FALSE; 1468 PetscFunctionReturn(0); 1469 } 1470 1471 #undef __FUNCT__ 1472 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1473 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1474 { 1475 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1476 PetscErrorCode ierr; 1477 1478 PetscFunctionBegin; 1479 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1480 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1481 PetscFunctionReturn(0); 1482 } 1483 1484 #undef __FUNCT__ 1485 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1486 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1487 { 1488 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1489 Mat A = a->A,B = a->B; 1490 PetscErrorCode ierr; 1491 PetscReal isend[5],irecv[5]; 1492 1493 PetscFunctionBegin; 1494 info->block_size = (PetscReal)matin->bs; 1495 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1496 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1497 isend[3] = info->memory; isend[4] = info->mallocs; 1498 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1499 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1500 isend[3] += info->memory; isend[4] += info->mallocs; 1501 if (flag == MAT_LOCAL) { 1502 info->nz_used = isend[0]; 1503 info->nz_allocated = isend[1]; 1504 info->nz_unneeded = isend[2]; 1505 info->memory = isend[3]; 1506 info->mallocs = isend[4]; 1507 } else if (flag == MAT_GLOBAL_MAX) { 1508 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1509 info->nz_used = irecv[0]; 1510 info->nz_allocated = irecv[1]; 1511 info->nz_unneeded = irecv[2]; 1512 info->memory = irecv[3]; 1513 info->mallocs = irecv[4]; 1514 } else if (flag == MAT_GLOBAL_SUM) { 1515 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1516 info->nz_used = irecv[0]; 1517 info->nz_allocated = irecv[1]; 1518 info->nz_unneeded = irecv[2]; 1519 info->memory = irecv[3]; 1520 info->mallocs = irecv[4]; 1521 } else { 1522 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1523 } 1524 info->rows_global = (PetscReal)A->M; 1525 info->columns_global = (PetscReal)A->N; 1526 info->rows_local = (PetscReal)A->m; 1527 info->columns_local = (PetscReal)A->N; 1528 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1529 info->fill_ratio_needed = 0; 1530 info->factor_mallocs = 0; 1531 PetscFunctionReturn(0); 1532 } 1533 1534 #undef __FUNCT__ 1535 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1536 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op) 1537 { 1538 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1539 PetscErrorCode ierr; 1540 1541 PetscFunctionBegin; 1542 switch (op) { 1543 case MAT_NO_NEW_NONZERO_LOCATIONS: 1544 case MAT_YES_NEW_NONZERO_LOCATIONS: 1545 case MAT_COLUMNS_UNSORTED: 1546 case MAT_COLUMNS_SORTED: 1547 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1548 case MAT_KEEP_ZEROED_ROWS: 1549 case MAT_NEW_NONZERO_LOCATION_ERR: 1550 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1551 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1552 break; 1553 case MAT_ROW_ORIENTED: 1554 a->roworiented = PETSC_TRUE; 1555 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1556 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1557 break; 1558 case MAT_ROWS_SORTED: 1559 case MAT_ROWS_UNSORTED: 1560 case MAT_YES_NEW_DIAGONALS: 1561 ierr = PetscVerboseInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr); 1562 break; 1563 case MAT_COLUMN_ORIENTED: 1564 a->roworiented = PETSC_FALSE; 1565 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1566 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1567 break; 1568 case MAT_IGNORE_OFF_PROC_ENTRIES: 1569 a->donotstash = PETSC_TRUE; 1570 break; 1571 case MAT_NO_NEW_DIAGONALS: 1572 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1573 case MAT_USE_HASH_TABLE: 1574 a->ht_flag = PETSC_TRUE; 1575 break; 1576 case MAT_SYMMETRIC: 1577 case MAT_STRUCTURALLY_SYMMETRIC: 1578 case MAT_HERMITIAN: 1579 case MAT_SYMMETRY_ETERNAL: 1580 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1581 break; 1582 case MAT_NOT_SYMMETRIC: 1583 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1584 case MAT_NOT_HERMITIAN: 1585 case MAT_NOT_SYMMETRY_ETERNAL: 1586 break; 1587 default: 1588 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1589 } 1590 PetscFunctionReturn(0); 1591 } 1592 1593 #undef __FUNCT__ 1594 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1595 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1596 { 1597 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1598 Mat_SeqBAIJ *Aloc; 1599 Mat B; 1600 PetscErrorCode ierr; 1601 PetscInt M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col; 1602 PetscInt bs=A->bs,mbs=baij->mbs; 1603 MatScalar *a; 1604 1605 PetscFunctionBegin; 1606 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1607 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1608 ierr = MatSetSizes(B,A->n,A->m,N,M);CHKERRQ(ierr); 1609 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1610 ierr = MatMPIBAIJSetPreallocation(B,A->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1611 1612 /* copy over the A part */ 1613 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1614 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1615 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1616 1617 for (i=0; i<mbs; i++) { 1618 rvals[0] = bs*(baij->rstart + i); 1619 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1620 for (j=ai[i]; j<ai[i+1]; j++) { 1621 col = (baij->cstart+aj[j])*bs; 1622 for (k=0; k<bs; k++) { 1623 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1624 col++; a += bs; 1625 } 1626 } 1627 } 1628 /* copy over the B part */ 1629 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1630 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1631 for (i=0; i<mbs; i++) { 1632 rvals[0] = bs*(baij->rstart + i); 1633 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1634 for (j=ai[i]; j<ai[i+1]; j++) { 1635 col = baij->garray[aj[j]]*bs; 1636 for (k=0; k<bs; k++) { 1637 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1638 col++; a += bs; 1639 } 1640 } 1641 } 1642 ierr = PetscFree(rvals);CHKERRQ(ierr); 1643 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1644 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1645 1646 if (matout) { 1647 *matout = B; 1648 } else { 1649 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1650 } 1651 PetscFunctionReturn(0); 1652 } 1653 1654 #undef __FUNCT__ 1655 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1656 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1657 { 1658 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1659 Mat a = baij->A,b = baij->B; 1660 PetscErrorCode ierr; 1661 PetscInt s1,s2,s3; 1662 1663 PetscFunctionBegin; 1664 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1665 if (rr) { 1666 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1667 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1668 /* Overlap communication with computation. */ 1669 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1670 } 1671 if (ll) { 1672 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1673 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1674 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1675 } 1676 /* scale the diagonal block */ 1677 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1678 1679 if (rr) { 1680 /* Do a scatter end and then right scale the off-diagonal block */ 1681 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1682 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1683 } 1684 1685 PetscFunctionReturn(0); 1686 } 1687 1688 #undef __FUNCT__ 1689 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1690 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 1691 { 1692 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1693 PetscErrorCode ierr; 1694 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1695 PetscInt i,*owners = l->rowners; 1696 PetscInt *nprocs,j,idx,nsends,row; 1697 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1698 PetscInt *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1; 1699 PetscInt *lens,*lrows,*values,bs=A->bs,rstart_bs=l->rstart_bs; 1700 MPI_Comm comm = A->comm; 1701 MPI_Request *send_waits,*recv_waits; 1702 MPI_Status recv_status,*send_status; 1703 #if defined(PETSC_DEBUG) 1704 PetscTruth found = PETSC_FALSE; 1705 #endif 1706 1707 PetscFunctionBegin; 1708 /* first count number of contributors to each processor */ 1709 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1710 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1711 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1712 j = 0; 1713 for (i=0; i<N; i++) { 1714 if (lastidx > (idx = rows[i])) j = 0; 1715 lastidx = idx; 1716 for (; j<size; j++) { 1717 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1718 nprocs[2*j]++; 1719 nprocs[2*j+1] = 1; 1720 owner[i] = j; 1721 #if defined(PETSC_DEBUG) 1722 found = PETSC_TRUE; 1723 #endif 1724 break; 1725 } 1726 } 1727 #if defined(PETSC_DEBUG) 1728 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1729 found = PETSC_FALSE; 1730 #endif 1731 } 1732 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1733 1734 /* inform other processors of number of messages and max length*/ 1735 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1736 1737 /* post receives: */ 1738 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1739 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1740 for (i=0; i<nrecvs; i++) { 1741 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1742 } 1743 1744 /* do sends: 1745 1) starts[i] gives the starting index in svalues for stuff going to 1746 the ith processor 1747 */ 1748 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1749 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1750 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1751 starts[0] = 0; 1752 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1753 for (i=0; i<N; i++) { 1754 svalues[starts[owner[i]]++] = rows[i]; 1755 } 1756 1757 starts[0] = 0; 1758 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1759 count = 0; 1760 for (i=0; i<size; i++) { 1761 if (nprocs[2*i+1]) { 1762 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1763 } 1764 } 1765 ierr = PetscFree(starts);CHKERRQ(ierr); 1766 1767 base = owners[rank]*bs; 1768 1769 /* wait on receives */ 1770 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 1771 source = lens + nrecvs; 1772 count = nrecvs; slen = 0; 1773 while (count) { 1774 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1775 /* unpack receives into our local space */ 1776 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1777 source[imdex] = recv_status.MPI_SOURCE; 1778 lens[imdex] = n; 1779 slen += n; 1780 count--; 1781 } 1782 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1783 1784 /* move the data into the send scatter */ 1785 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1786 count = 0; 1787 for (i=0; i<nrecvs; i++) { 1788 values = rvalues + i*nmax; 1789 for (j=0; j<lens[i]; j++) { 1790 lrows[count++] = values[j] - base; 1791 } 1792 } 1793 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1794 ierr = PetscFree(lens);CHKERRQ(ierr); 1795 ierr = PetscFree(owner);CHKERRQ(ierr); 1796 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1797 1798 /* actually zap the local rows */ 1799 /* 1800 Zero the required rows. If the "diagonal block" of the matrix 1801 is square and the user wishes to set the diagonal we use separate 1802 code so that MatSetValues() is not called for each diagonal allocating 1803 new memory, thus calling lots of mallocs and slowing things down. 1804 1805 Contributed by: Matthew Knepley 1806 */ 1807 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1808 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr); 1809 if ((diag != 0.0) && (l->A->M == l->A->N)) { 1810 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr); 1811 } else if (diag != 0.0) { 1812 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1813 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1814 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1815 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1816 } 1817 for (i=0; i<slen; i++) { 1818 row = lrows[i] + rstart_bs; 1819 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1820 } 1821 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1822 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1823 } else { 1824 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1825 } 1826 1827 ierr = PetscFree(lrows);CHKERRQ(ierr); 1828 1829 /* wait on sends */ 1830 if (nsends) { 1831 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1832 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1833 ierr = PetscFree(send_status);CHKERRQ(ierr); 1834 } 1835 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1836 ierr = PetscFree(svalues);CHKERRQ(ierr); 1837 1838 PetscFunctionReturn(0); 1839 } 1840 1841 #undef __FUNCT__ 1842 #define __FUNCT__ "MatPrintHelp_MPIBAIJ" 1843 PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A) 1844 { 1845 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1846 MPI_Comm comm = A->comm; 1847 static PetscTruth called = PETSC_FALSE; 1848 PetscErrorCode ierr; 1849 1850 PetscFunctionBegin; 1851 if (!a->rank) { 1852 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1853 } 1854 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1855 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1856 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1857 PetscFunctionReturn(0); 1858 } 1859 1860 #undef __FUNCT__ 1861 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1862 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1863 { 1864 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1865 PetscErrorCode ierr; 1866 1867 PetscFunctionBegin; 1868 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1869 PetscFunctionReturn(0); 1870 } 1871 1872 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1873 1874 #undef __FUNCT__ 1875 #define __FUNCT__ "MatEqual_MPIBAIJ" 1876 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1877 { 1878 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1879 Mat a,b,c,d; 1880 PetscTruth flg; 1881 PetscErrorCode ierr; 1882 1883 PetscFunctionBegin; 1884 a = matA->A; b = matA->B; 1885 c = matB->A; d = matB->B; 1886 1887 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1888 if (flg) { 1889 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1890 } 1891 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1892 PetscFunctionReturn(0); 1893 } 1894 1895 #undef __FUNCT__ 1896 #define __FUNCT__ "MatCopy_MPIBAIJ" 1897 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1898 { 1899 PetscErrorCode ierr; 1900 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1901 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1902 1903 PetscFunctionBegin; 1904 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1905 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1906 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1907 } else { 1908 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1909 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1910 } 1911 PetscFunctionReturn(0); 1912 } 1913 1914 #undef __FUNCT__ 1915 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1916 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1917 { 1918 PetscErrorCode ierr; 1919 1920 PetscFunctionBegin; 1921 ierr = MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1922 PetscFunctionReturn(0); 1923 } 1924 1925 #include "petscblaslapack.h" 1926 #undef __FUNCT__ 1927 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1928 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1929 { 1930 PetscErrorCode ierr; 1931 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1932 PetscBLASInt bnz,one=1; 1933 Mat_SeqBAIJ *x,*y; 1934 1935 PetscFunctionBegin; 1936 if (str == SAME_NONZERO_PATTERN) { 1937 PetscScalar alpha = a; 1938 x = (Mat_SeqBAIJ *)xx->A->data; 1939 y = (Mat_SeqBAIJ *)yy->A->data; 1940 bnz = (PetscBLASInt)x->nz; 1941 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1942 x = (Mat_SeqBAIJ *)xx->B->data; 1943 y = (Mat_SeqBAIJ *)yy->B->data; 1944 bnz = (PetscBLASInt)x->nz; 1945 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1946 } else { 1947 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1948 } 1949 PetscFunctionReturn(0); 1950 } 1951 1952 #undef __FUNCT__ 1953 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1954 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1955 { 1956 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1957 PetscErrorCode ierr; 1958 1959 PetscFunctionBegin; 1960 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1961 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1962 PetscFunctionReturn(0); 1963 } 1964 1965 #undef __FUNCT__ 1966 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 1967 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1968 { 1969 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1970 PetscErrorCode ierr; 1971 1972 PetscFunctionBegin; 1973 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1974 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1975 PetscFunctionReturn(0); 1976 } 1977 1978 /* -------------------------------------------------------------------*/ 1979 static struct _MatOps MatOps_Values = { 1980 MatSetValues_MPIBAIJ, 1981 MatGetRow_MPIBAIJ, 1982 MatRestoreRow_MPIBAIJ, 1983 MatMult_MPIBAIJ, 1984 /* 4*/ MatMultAdd_MPIBAIJ, 1985 MatMultTranspose_MPIBAIJ, 1986 MatMultTransposeAdd_MPIBAIJ, 1987 0, 1988 0, 1989 0, 1990 /*10*/ 0, 1991 0, 1992 0, 1993 0, 1994 MatTranspose_MPIBAIJ, 1995 /*15*/ MatGetInfo_MPIBAIJ, 1996 MatEqual_MPIBAIJ, 1997 MatGetDiagonal_MPIBAIJ, 1998 MatDiagonalScale_MPIBAIJ, 1999 MatNorm_MPIBAIJ, 2000 /*20*/ MatAssemblyBegin_MPIBAIJ, 2001 MatAssemblyEnd_MPIBAIJ, 2002 0, 2003 MatSetOption_MPIBAIJ, 2004 MatZeroEntries_MPIBAIJ, 2005 /*25*/ MatZeroRows_MPIBAIJ, 2006 0, 2007 0, 2008 0, 2009 0, 2010 /*30*/ MatSetUpPreallocation_MPIBAIJ, 2011 0, 2012 0, 2013 0, 2014 0, 2015 /*35*/ MatDuplicate_MPIBAIJ, 2016 0, 2017 0, 2018 0, 2019 0, 2020 /*40*/ MatAXPY_MPIBAIJ, 2021 MatGetSubMatrices_MPIBAIJ, 2022 MatIncreaseOverlap_MPIBAIJ, 2023 MatGetValues_MPIBAIJ, 2024 MatCopy_MPIBAIJ, 2025 /*45*/ MatPrintHelp_MPIBAIJ, 2026 MatScale_MPIBAIJ, 2027 0, 2028 0, 2029 0, 2030 /*50*/ 0, 2031 0, 2032 0, 2033 0, 2034 0, 2035 /*55*/ 0, 2036 0, 2037 MatSetUnfactored_MPIBAIJ, 2038 0, 2039 MatSetValuesBlocked_MPIBAIJ, 2040 /*60*/ 0, 2041 MatDestroy_MPIBAIJ, 2042 MatView_MPIBAIJ, 2043 MatGetPetscMaps_Petsc, 2044 0, 2045 /*65*/ 0, 2046 0, 2047 0, 2048 0, 2049 0, 2050 /*70*/ MatGetRowMax_MPIBAIJ, 2051 0, 2052 0, 2053 0, 2054 0, 2055 /*75*/ 0, 2056 0, 2057 0, 2058 0, 2059 0, 2060 /*80*/ 0, 2061 0, 2062 0, 2063 0, 2064 MatLoad_MPIBAIJ, 2065 /*85*/ 0, 2066 0, 2067 0, 2068 0, 2069 0, 2070 /*90*/ 0, 2071 0, 2072 0, 2073 0, 2074 0, 2075 /*95*/ 0, 2076 0, 2077 0, 2078 0, 2079 0, 2080 /*100*/0, 2081 0, 2082 0, 2083 0, 2084 0, 2085 /*105*/0, 2086 MatRealPart_MPIBAIJ, 2087 MatImaginaryPart_MPIBAIJ}; 2088 2089 2090 EXTERN_C_BEGIN 2091 #undef __FUNCT__ 2092 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2093 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2094 { 2095 PetscFunctionBegin; 2096 *a = ((Mat_MPIBAIJ *)A->data)->A; 2097 *iscopy = PETSC_FALSE; 2098 PetscFunctionReturn(0); 2099 } 2100 EXTERN_C_END 2101 2102 EXTERN_C_BEGIN 2103 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2104 EXTERN_C_END 2105 2106 #undef __FUNCT__ 2107 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2108 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2109 { 2110 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2111 PetscInt m = B->m/bs,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2112 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2113 const PetscInt *JJ; 2114 PetscScalar *values; 2115 PetscErrorCode ierr; 2116 2117 PetscFunctionBegin; 2118 #if defined(PETSC_OPT_g) 2119 if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]); 2120 #endif 2121 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2122 o_nnz = d_nnz + m; 2123 2124 for (i=0; i<m; i++) { 2125 nnz = I[i+1]- I[i]; 2126 JJ = J + I[i]; 2127 nnz_max = PetscMax(nnz_max,nnz); 2128 #if defined(PETSC_OPT_g) 2129 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2130 #endif 2131 for (j=0; j<nnz; j++) { 2132 if (*JJ >= cstart) break; 2133 JJ++; 2134 } 2135 d = 0; 2136 for (; j<nnz; j++) { 2137 if (*JJ++ >= cend) break; 2138 d++; 2139 } 2140 d_nnz[i] = d; 2141 o_nnz[i] = nnz - d; 2142 } 2143 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2144 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2145 2146 if (v) values = (PetscScalar*)v; 2147 else { 2148 ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2149 ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2150 } 2151 2152 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2153 for (i=0; i<m; i++) { 2154 ii = i + rstart; 2155 nnz = I[i+1]- I[i]; 2156 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2157 } 2158 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2159 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2160 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2161 2162 if (!v) { 2163 ierr = PetscFree(values);CHKERRQ(ierr); 2164 } 2165 PetscFunctionReturn(0); 2166 } 2167 2168 #undef __FUNCT__ 2169 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2170 /*@C 2171 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2172 (the default parallel PETSc format). 2173 2174 Collective on MPI_Comm 2175 2176 Input Parameters: 2177 + A - the matrix 2178 . i - the indices into j for the start of each local row (starts with zero) 2179 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2180 - v - optional values in the matrix 2181 2182 Level: developer 2183 2184 .keywords: matrix, aij, compressed row, sparse, parallel 2185 2186 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2187 @*/ 2188 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2189 { 2190 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2191 2192 PetscFunctionBegin; 2193 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2194 if (f) { 2195 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2196 } 2197 PetscFunctionReturn(0); 2198 } 2199 2200 EXTERN_C_BEGIN 2201 #undef __FUNCT__ 2202 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2203 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2204 { 2205 Mat_MPIBAIJ *b; 2206 PetscErrorCode ierr; 2207 PetscInt i; 2208 2209 PetscFunctionBegin; 2210 B->preallocated = PETSC_TRUE; 2211 ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2212 2213 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2214 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2215 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2216 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2217 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2218 if (d_nnz) { 2219 for (i=0; i<B->m/bs; i++) { 2220 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2221 } 2222 } 2223 if (o_nnz) { 2224 for (i=0; i<B->m/bs; i++) { 2225 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2226 } 2227 } 2228 2229 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 2230 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 2231 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 2232 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 2233 2234 b = (Mat_MPIBAIJ*)B->data; 2235 B->bs = bs; 2236 b->bs2 = bs*bs; 2237 b->mbs = B->m/bs; 2238 b->nbs = B->n/bs; 2239 b->Mbs = B->M/bs; 2240 b->Nbs = B->N/bs; 2241 2242 ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 2243 b->rowners[0] = 0; 2244 for (i=2; i<=b->size; i++) { 2245 b->rowners[i] += b->rowners[i-1]; 2246 } 2247 b->rstart = b->rowners[b->rank]; 2248 b->rend = b->rowners[b->rank+1]; 2249 2250 ierr = MPI_Allgather(&b->nbs,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 2251 b->cowners[0] = 0; 2252 for (i=2; i<=b->size; i++) { 2253 b->cowners[i] += b->cowners[i-1]; 2254 } 2255 b->cstart = b->cowners[b->rank]; 2256 b->cend = b->cowners[b->rank+1]; 2257 2258 for (i=0; i<=b->size; i++) { 2259 b->rowners_bs[i] = b->rowners[i]*bs; 2260 } 2261 b->rstart_bs = b->rstart*bs; 2262 b->rend_bs = b->rend*bs; 2263 b->cstart_bs = b->cstart*bs; 2264 b->cend_bs = b->cend*bs; 2265 2266 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2267 ierr = MatSetSizes(b->A,B->m,B->n,B->m,B->n);CHKERRQ(ierr); 2268 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2269 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2270 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2271 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2272 ierr = MatSetSizes(b->B,B->m,B->N,B->m,B->N);CHKERRQ(ierr); 2273 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2274 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2275 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2276 2277 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2278 2279 PetscFunctionReturn(0); 2280 } 2281 EXTERN_C_END 2282 2283 EXTERN_C_BEGIN 2284 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2285 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2286 EXTERN_C_END 2287 2288 /*MC 2289 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2290 2291 Options Database Keys: 2292 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2293 2294 Level: beginner 2295 2296 .seealso: MatCreateMPIBAIJ 2297 M*/ 2298 2299 EXTERN_C_BEGIN 2300 #undef __FUNCT__ 2301 #define __FUNCT__ "MatCreate_MPIBAIJ" 2302 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 2303 { 2304 Mat_MPIBAIJ *b; 2305 PetscErrorCode ierr; 2306 PetscTruth flg; 2307 2308 PetscFunctionBegin; 2309 ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2310 B->data = (void*)b; 2311 2312 2313 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2314 B->mapping = 0; 2315 B->factor = 0; 2316 B->assembled = PETSC_FALSE; 2317 2318 B->insertmode = NOT_SET_VALUES; 2319 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 2320 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 2321 2322 /* build local table of row and column ownerships */ 2323 ierr = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 2324 ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 2325 b->cowners = b->rowners + b->size + 2; 2326 b->rowners_bs = b->cowners + b->size + 2; 2327 2328 /* build cache for off array entries formed */ 2329 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2330 b->donotstash = PETSC_FALSE; 2331 b->colmap = PETSC_NULL; 2332 b->garray = PETSC_NULL; 2333 b->roworiented = PETSC_TRUE; 2334 2335 #if defined(PETSC_USE_MAT_SINGLE) 2336 /* stuff for MatSetValues_XXX in single precision */ 2337 b->setvalueslen = 0; 2338 b->setvaluescopy = PETSC_NULL; 2339 #endif 2340 2341 /* stuff used in block assembly */ 2342 b->barray = 0; 2343 2344 /* stuff used for matrix vector multiply */ 2345 b->lvec = 0; 2346 b->Mvctx = 0; 2347 2348 /* stuff for MatGetRow() */ 2349 b->rowindices = 0; 2350 b->rowvalues = 0; 2351 b->getrowactive = PETSC_FALSE; 2352 2353 /* hash table stuff */ 2354 b->ht = 0; 2355 b->hd = 0; 2356 b->ht_size = 0; 2357 b->ht_flag = PETSC_FALSE; 2358 b->ht_fact = 0; 2359 b->ht_total_ct = 0; 2360 b->ht_insert_ct = 0; 2361 2362 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2363 if (flg) { 2364 PetscReal fact = 1.39; 2365 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2366 ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2367 if (fact <= 1.0) fact = 1.39; 2368 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2369 ierr = PetscVerboseInfo((0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr); 2370 } 2371 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2372 "MatStoreValues_MPIBAIJ", 2373 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2374 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2375 "MatRetrieveValues_MPIBAIJ", 2376 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2377 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2378 "MatGetDiagonalBlock_MPIBAIJ", 2379 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2380 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2381 "MatMPIBAIJSetPreallocation_MPIBAIJ", 2382 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2383 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 2384 "MatMPIBAIJSetPreallocationCSR_MPIAIJ", 2385 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 2386 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 2387 "MatDiagonalScaleLocal_MPIBAIJ", 2388 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2389 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 2390 "MatSetHashTableFactor_MPIBAIJ", 2391 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2392 PetscFunctionReturn(0); 2393 } 2394 EXTERN_C_END 2395 2396 /*MC 2397 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2398 2399 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2400 and MATMPIBAIJ otherwise. 2401 2402 Options Database Keys: 2403 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2404 2405 Level: beginner 2406 2407 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2408 M*/ 2409 2410 EXTERN_C_BEGIN 2411 #undef __FUNCT__ 2412 #define __FUNCT__ "MatCreate_BAIJ" 2413 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 2414 { 2415 PetscErrorCode ierr; 2416 PetscMPIInt size; 2417 2418 PetscFunctionBegin; 2419 ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr); 2420 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2421 if (size == 1) { 2422 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 2423 } else { 2424 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 2425 } 2426 PetscFunctionReturn(0); 2427 } 2428 EXTERN_C_END 2429 2430 #undef __FUNCT__ 2431 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2432 /*@C 2433 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2434 (block compressed row). For good matrix assembly performance 2435 the user should preallocate the matrix storage by setting the parameters 2436 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2437 performance can be increased by more than a factor of 50. 2438 2439 Collective on Mat 2440 2441 Input Parameters: 2442 + A - the matrix 2443 . bs - size of blockk 2444 . d_nz - number of block nonzeros per block row in diagonal portion of local 2445 submatrix (same for all local rows) 2446 . d_nnz - array containing the number of block nonzeros in the various block rows 2447 of the in diagonal portion of the local (possibly different for each block 2448 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2449 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2450 submatrix (same for all local rows). 2451 - o_nnz - array containing the number of nonzeros in the various block rows of the 2452 off-diagonal portion of the local submatrix (possibly different for 2453 each block row) or PETSC_NULL. 2454 2455 If the *_nnz parameter is given then the *_nz parameter is ignored 2456 2457 Options Database Keys: 2458 . -mat_no_unroll - uses code that does not unroll the loops in the 2459 block calculations (much slower) 2460 . -mat_block_size - size of the blocks to use 2461 2462 Notes: 2463 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2464 than it must be used on all processors that share the object for that argument. 2465 2466 Storage Information: 2467 For a square global matrix we define each processor's diagonal portion 2468 to be its local rows and the corresponding columns (a square submatrix); 2469 each processor's off-diagonal portion encompasses the remainder of the 2470 local matrix (a rectangular submatrix). 2471 2472 The user can specify preallocated storage for the diagonal part of 2473 the local submatrix with either d_nz or d_nnz (not both). Set 2474 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2475 memory allocation. Likewise, specify preallocated storage for the 2476 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2477 2478 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2479 the figure below we depict these three local rows and all columns (0-11). 2480 2481 .vb 2482 0 1 2 3 4 5 6 7 8 9 10 11 2483 ------------------- 2484 row 3 | o o o d d d o o o o o o 2485 row 4 | o o o d d d o o o o o o 2486 row 5 | o o o d d d o o o o o o 2487 ------------------- 2488 .ve 2489 2490 Thus, any entries in the d locations are stored in the d (diagonal) 2491 submatrix, and any entries in the o locations are stored in the 2492 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2493 stored simply in the MATSEQBAIJ format for compressed row storage. 2494 2495 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2496 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2497 In general, for PDE problems in which most nonzeros are near the diagonal, 2498 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2499 or you will get TERRIBLE performance; see the users' manual chapter on 2500 matrices. 2501 2502 Level: intermediate 2503 2504 .keywords: matrix, block, aij, compressed row, sparse, parallel 2505 2506 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 2507 @*/ 2508 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2509 { 2510 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2511 2512 PetscFunctionBegin; 2513 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2514 if (f) { 2515 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2516 } 2517 PetscFunctionReturn(0); 2518 } 2519 2520 #undef __FUNCT__ 2521 #define __FUNCT__ "MatCreateMPIBAIJ" 2522 /*@C 2523 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 2524 (block compressed row). For good matrix assembly performance 2525 the user should preallocate the matrix storage by setting the parameters 2526 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2527 performance can be increased by more than a factor of 50. 2528 2529 Collective on MPI_Comm 2530 2531 Input Parameters: 2532 + comm - MPI communicator 2533 . bs - size of blockk 2534 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2535 This value should be the same as the local size used in creating the 2536 y vector for the matrix-vector product y = Ax. 2537 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2538 This value should be the same as the local size used in creating the 2539 x vector for the matrix-vector product y = Ax. 2540 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2541 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2542 . d_nz - number of nonzero blocks per block row in diagonal portion of local 2543 submatrix (same for all local rows) 2544 . d_nnz - array containing the number of nonzero blocks in the various block rows 2545 of the in diagonal portion of the local (possibly different for each block 2546 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2547 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 2548 submatrix (same for all local rows). 2549 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 2550 off-diagonal portion of the local submatrix (possibly different for 2551 each block row) or PETSC_NULL. 2552 2553 Output Parameter: 2554 . A - the matrix 2555 2556 Options Database Keys: 2557 . -mat_no_unroll - uses code that does not unroll the loops in the 2558 block calculations (much slower) 2559 . -mat_block_size - size of the blocks to use 2560 2561 Notes: 2562 If the *_nnz parameter is given then the *_nz parameter is ignored 2563 2564 A nonzero block is any block that as 1 or more nonzeros in it 2565 2566 The user MUST specify either the local or global matrix dimensions 2567 (possibly both). 2568 2569 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2570 than it must be used on all processors that share the object for that argument. 2571 2572 Storage Information: 2573 For a square global matrix we define each processor's diagonal portion 2574 to be its local rows and the corresponding columns (a square submatrix); 2575 each processor's off-diagonal portion encompasses the remainder of the 2576 local matrix (a rectangular submatrix). 2577 2578 The user can specify preallocated storage for the diagonal part of 2579 the local submatrix with either d_nz or d_nnz (not both). Set 2580 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2581 memory allocation. Likewise, specify preallocated storage for the 2582 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2583 2584 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2585 the figure below we depict these three local rows and all columns (0-11). 2586 2587 .vb 2588 0 1 2 3 4 5 6 7 8 9 10 11 2589 ------------------- 2590 row 3 | o o o d d d o o o o o o 2591 row 4 | o o o d d d o o o o o o 2592 row 5 | o o o d d d o o o o o o 2593 ------------------- 2594 .ve 2595 2596 Thus, any entries in the d locations are stored in the d (diagonal) 2597 submatrix, and any entries in the o locations are stored in the 2598 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2599 stored simply in the MATSEQBAIJ format for compressed row storage. 2600 2601 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2602 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2603 In general, for PDE problems in which most nonzeros are near the diagonal, 2604 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2605 or you will get TERRIBLE performance; see the users' manual chapter on 2606 matrices. 2607 2608 Level: intermediate 2609 2610 .keywords: matrix, block, aij, compressed row, sparse, parallel 2611 2612 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2613 @*/ 2614 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2615 { 2616 PetscErrorCode ierr; 2617 PetscMPIInt size; 2618 2619 PetscFunctionBegin; 2620 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2621 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2622 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2623 if (size > 1) { 2624 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2625 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2626 } else { 2627 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2628 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2629 } 2630 PetscFunctionReturn(0); 2631 } 2632 2633 #undef __FUNCT__ 2634 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 2635 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2636 { 2637 Mat mat; 2638 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2639 PetscErrorCode ierr; 2640 PetscInt len=0; 2641 2642 PetscFunctionBegin; 2643 *newmat = 0; 2644 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 2645 ierr = MatSetSizes(mat,matin->m,matin->n,matin->M,matin->N);CHKERRQ(ierr); 2646 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2647 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2648 2649 mat->factor = matin->factor; 2650 mat->preallocated = PETSC_TRUE; 2651 mat->assembled = PETSC_TRUE; 2652 mat->insertmode = NOT_SET_VALUES; 2653 2654 a = (Mat_MPIBAIJ*)mat->data; 2655 mat->bs = matin->bs; 2656 a->bs2 = oldmat->bs2; 2657 a->mbs = oldmat->mbs; 2658 a->nbs = oldmat->nbs; 2659 a->Mbs = oldmat->Mbs; 2660 a->Nbs = oldmat->Nbs; 2661 2662 a->rstart = oldmat->rstart; 2663 a->rend = oldmat->rend; 2664 a->cstart = oldmat->cstart; 2665 a->cend = oldmat->cend; 2666 a->size = oldmat->size; 2667 a->rank = oldmat->rank; 2668 a->donotstash = oldmat->donotstash; 2669 a->roworiented = oldmat->roworiented; 2670 a->rowindices = 0; 2671 a->rowvalues = 0; 2672 a->getrowactive = PETSC_FALSE; 2673 a->barray = 0; 2674 a->rstart_bs = oldmat->rstart_bs; 2675 a->rend_bs = oldmat->rend_bs; 2676 a->cstart_bs = oldmat->cstart_bs; 2677 a->cend_bs = oldmat->cend_bs; 2678 2679 /* hash table stuff */ 2680 a->ht = 0; 2681 a->hd = 0; 2682 a->ht_size = 0; 2683 a->ht_flag = oldmat->ht_flag; 2684 a->ht_fact = oldmat->ht_fact; 2685 a->ht_total_ct = 0; 2686 a->ht_insert_ct = 0; 2687 2688 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2689 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2690 ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr); 2691 if (oldmat->colmap) { 2692 #if defined (PETSC_USE_CTABLE) 2693 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2694 #else 2695 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2696 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2697 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2698 #endif 2699 } else a->colmap = 0; 2700 2701 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2702 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2703 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2704 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2705 } else a->garray = 0; 2706 2707 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2708 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2709 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2710 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2711 2712 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2713 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2714 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2715 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2716 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2717 *newmat = mat; 2718 2719 PetscFunctionReturn(0); 2720 } 2721 2722 #include "petscsys.h" 2723 2724 #undef __FUNCT__ 2725 #define __FUNCT__ "MatLoad_MPIBAIJ" 2726 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2727 { 2728 Mat A; 2729 PetscErrorCode ierr; 2730 int fd; 2731 PetscInt i,nz,j,rstart,rend; 2732 PetscScalar *vals,*buf; 2733 MPI_Comm comm = ((PetscObject)viewer)->comm; 2734 MPI_Status status; 2735 PetscMPIInt rank,size,maxnz; 2736 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 2737 PetscInt *locrowlens,*procsnz = 0,*browners; 2738 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 2739 PetscMPIInt tag = ((PetscObject)viewer)->tag; 2740 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2741 PetscInt dcount,kmax,k,nzcount,tmp,mend; 2742 2743 PetscFunctionBegin; 2744 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2745 2746 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2747 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2748 if (!rank) { 2749 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2750 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2751 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2752 } 2753 2754 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2755 M = header[1]; N = header[2]; 2756 2757 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2758 2759 /* 2760 This code adds extra rows to make sure the number of rows is 2761 divisible by the blocksize 2762 */ 2763 Mbs = M/bs; 2764 extra_rows = bs - M + bs*Mbs; 2765 if (extra_rows == bs) extra_rows = 0; 2766 else Mbs++; 2767 if (extra_rows && !rank) { 2768 ierr = PetscVerboseInfo((0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 2769 } 2770 2771 /* determine ownership of all rows */ 2772 mbs = Mbs/size + ((Mbs % size) > rank); 2773 m = mbs*bs; 2774 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 2775 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2776 2777 /* process 0 needs enough room for process with most rows */ 2778 if (!rank) { 2779 mmax = rowners[1]; 2780 for (i=2; i<size; i++) { 2781 mmax = PetscMax(mmax,rowners[i]); 2782 } 2783 mmax*=bs; 2784 } else mmax = m; 2785 2786 rowners[0] = 0; 2787 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2788 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2789 rstart = rowners[rank]; 2790 rend = rowners[rank+1]; 2791 2792 /* distribute row lengths to all processors */ 2793 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 2794 if (!rank) { 2795 mend = m; 2796 if (size == 1) mend = mend - extra_rows; 2797 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 2798 for (j=mend; j<m; j++) locrowlens[j] = 1; 2799 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2800 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2801 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2802 for (j=0; j<m; j++) { 2803 procsnz[0] += locrowlens[j]; 2804 } 2805 for (i=1; i<size; i++) { 2806 mend = browners[i+1] - browners[i]; 2807 if (i == size-1) mend = mend - extra_rows; 2808 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 2809 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 2810 /* calculate the number of nonzeros on each processor */ 2811 for (j=0; j<browners[i+1]-browners[i]; j++) { 2812 procsnz[i] += rowlengths[j]; 2813 } 2814 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2815 } 2816 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2817 } else { 2818 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2819 } 2820 2821 if (!rank) { 2822 /* determine max buffer needed and allocate it */ 2823 maxnz = procsnz[0]; 2824 for (i=1; i<size; i++) { 2825 maxnz = PetscMax(maxnz,procsnz[i]); 2826 } 2827 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2828 2829 /* read in my part of the matrix column indices */ 2830 nz = procsnz[0]; 2831 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2832 mycols = ibuf; 2833 if (size == 1) nz -= extra_rows; 2834 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2835 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2836 2837 /* read in every ones (except the last) and ship off */ 2838 for (i=1; i<size-1; i++) { 2839 nz = procsnz[i]; 2840 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2841 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2842 } 2843 /* read in the stuff for the last proc */ 2844 if (size != 1) { 2845 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2846 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2847 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2848 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2849 } 2850 ierr = PetscFree(cols);CHKERRQ(ierr); 2851 } else { 2852 /* determine buffer space needed for message */ 2853 nz = 0; 2854 for (i=0; i<m; i++) { 2855 nz += locrowlens[i]; 2856 } 2857 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2858 mycols = ibuf; 2859 /* receive message of column indices*/ 2860 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2861 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2862 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2863 } 2864 2865 /* loop over local rows, determining number of off diagonal entries */ 2866 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2867 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2868 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2869 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2870 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2871 rowcount = 0; nzcount = 0; 2872 for (i=0; i<mbs; i++) { 2873 dcount = 0; 2874 odcount = 0; 2875 for (j=0; j<bs; j++) { 2876 kmax = locrowlens[rowcount]; 2877 for (k=0; k<kmax; k++) { 2878 tmp = mycols[nzcount++]/bs; 2879 if (!mask[tmp]) { 2880 mask[tmp] = 1; 2881 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2882 else masked1[dcount++] = tmp; 2883 } 2884 } 2885 rowcount++; 2886 } 2887 2888 dlens[i] = dcount; 2889 odlens[i] = odcount; 2890 2891 /* zero out the mask elements we set */ 2892 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2893 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2894 } 2895 2896 /* create our matrix */ 2897 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2898 ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2899 ierr = MatSetType(A,type);CHKERRQ(ierr) 2900 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2901 2902 /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */ 2903 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2904 2905 if (!rank) { 2906 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2907 /* read in my part of the matrix numerical values */ 2908 nz = procsnz[0]; 2909 vals = buf; 2910 mycols = ibuf; 2911 if (size == 1) nz -= extra_rows; 2912 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2913 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2914 2915 /* insert into matrix */ 2916 jj = rstart*bs; 2917 for (i=0; i<m; i++) { 2918 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2919 mycols += locrowlens[i]; 2920 vals += locrowlens[i]; 2921 jj++; 2922 } 2923 /* read in other processors (except the last one) and ship out */ 2924 for (i=1; i<size-1; i++) { 2925 nz = procsnz[i]; 2926 vals = buf; 2927 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2928 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2929 } 2930 /* the last proc */ 2931 if (size != 1){ 2932 nz = procsnz[i] - extra_rows; 2933 vals = buf; 2934 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2935 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2936 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2937 } 2938 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2939 } else { 2940 /* receive numeric values */ 2941 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2942 2943 /* receive message of values*/ 2944 vals = buf; 2945 mycols = ibuf; 2946 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2947 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2948 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2949 2950 /* insert into matrix */ 2951 jj = rstart*bs; 2952 for (i=0; i<m; i++) { 2953 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2954 mycols += locrowlens[i]; 2955 vals += locrowlens[i]; 2956 jj++; 2957 } 2958 } 2959 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2960 ierr = PetscFree(buf);CHKERRQ(ierr); 2961 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2962 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2963 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2964 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2965 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2966 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2967 2968 *newmat = A; 2969 PetscFunctionReturn(0); 2970 } 2971 2972 #undef __FUNCT__ 2973 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2974 /*@ 2975 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2976 2977 Input Parameters: 2978 . mat - the matrix 2979 . fact - factor 2980 2981 Collective on Mat 2982 2983 Level: advanced 2984 2985 Notes: 2986 This can also be set by the command line option: -mat_use_hash_table fact 2987 2988 .keywords: matrix, hashtable, factor, HT 2989 2990 .seealso: MatSetOption() 2991 @*/ 2992 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2993 { 2994 PetscErrorCode ierr,(*f)(Mat,PetscReal); 2995 2996 PetscFunctionBegin; 2997 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 2998 if (f) { 2999 ierr = (*f)(mat,fact);CHKERRQ(ierr); 3000 } 3001 PetscFunctionReturn(0); 3002 } 3003 3004 EXTERN_C_BEGIN 3005 #undef __FUNCT__ 3006 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3007 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3008 { 3009 Mat_MPIBAIJ *baij; 3010 3011 PetscFunctionBegin; 3012 baij = (Mat_MPIBAIJ*)mat->data; 3013 baij->ht_fact = fact; 3014 PetscFunctionReturn(0); 3015 } 3016 EXTERN_C_END 3017 3018 #undef __FUNCT__ 3019 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3020 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3021 { 3022 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3023 PetscFunctionBegin; 3024 *Ad = a->A; 3025 *Ao = a->B; 3026 *colmap = a->garray; 3027 PetscFunctionReturn(0); 3028 } 3029