1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar); 15 16 #undef __FUNCT__ 17 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 18 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 19 { 20 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 21 PetscErrorCode ierr; 22 PetscInt i,*idxb = 0; 23 PetscScalar *va,*vb; 24 Vec vtmp; 25 26 PetscFunctionBegin; 27 28 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 29 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 30 if (idx) { 31 for (i=0; i<A->cmap.n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap.rstart;} 32 } 33 34 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap.n,&vtmp);CHKERRQ(ierr); 35 if (idx) {ierr = PetscMalloc(A->rmap.n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 36 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 37 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 38 39 for (i=0; i<A->rmap.n; i++){ 40 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap.bs*a->garray[idxb[i]/A->cmap.bs] + (idxb[i] % A->cmap.bs);} 41 } 42 43 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 44 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 45 if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);} 46 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 47 PetscFunctionReturn(0); 48 } 49 50 EXTERN_C_BEGIN 51 #undef __FUNCT__ 52 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 53 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 54 { 55 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 56 PetscErrorCode ierr; 57 58 PetscFunctionBegin; 59 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 60 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 61 PetscFunctionReturn(0); 62 } 63 EXTERN_C_END 64 65 EXTERN_C_BEGIN 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 68 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 69 { 70 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 75 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 76 PetscFunctionReturn(0); 77 } 78 EXTERN_C_END 79 80 /* 81 Local utility routine that creates a mapping from the global column 82 number to the local number in the off-diagonal part of the local 83 storage of the matrix. This is done in a non scable way since the 84 length of colmap equals the global matrix length. 85 */ 86 #undef __FUNCT__ 87 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 88 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 89 { 90 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 91 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 92 PetscErrorCode ierr; 93 PetscInt nbs = B->nbs,i,bs=mat->rmap.bs; 94 95 PetscFunctionBegin; 96 #if defined (PETSC_USE_CTABLE) 97 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 98 for (i=0; i<nbs; i++){ 99 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 100 } 101 #else 102 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 103 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 104 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 105 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 106 #endif 107 PetscFunctionReturn(0); 108 } 109 110 #define CHUNKSIZE 10 111 112 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 113 { \ 114 \ 115 brow = row/bs; \ 116 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 117 rmax = aimax[brow]; nrow = ailen[brow]; \ 118 bcol = col/bs; \ 119 ridx = row % bs; cidx = col % bs; \ 120 low = 0; high = nrow; \ 121 while (high-low > 3) { \ 122 t = (low+high)/2; \ 123 if (rp[t] > bcol) high = t; \ 124 else low = t; \ 125 } \ 126 for (_i=low; _i<high; _i++) { \ 127 if (rp[_i] > bcol) break; \ 128 if (rp[_i] == bcol) { \ 129 bap = ap + bs2*_i + bs*cidx + ridx; \ 130 if (addv == ADD_VALUES) *bap += value; \ 131 else *bap = value; \ 132 goto a_noinsert; \ 133 } \ 134 } \ 135 if (a->nonew == 1) goto a_noinsert; \ 136 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 137 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 138 N = nrow++ - 1; \ 139 /* shift up all the later entries in this row */ \ 140 for (ii=N; ii>=_i; ii--) { \ 141 rp[ii+1] = rp[ii]; \ 142 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 143 } \ 144 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 145 rp[_i] = bcol; \ 146 ap[bs2*_i + bs*cidx + ridx] = value; \ 147 a_noinsert:; \ 148 ailen[brow] = nrow; \ 149 } 150 151 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 152 { \ 153 brow = row/bs; \ 154 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 155 rmax = bimax[brow]; nrow = bilen[brow]; \ 156 bcol = col/bs; \ 157 ridx = row % bs; cidx = col % bs; \ 158 low = 0; high = nrow; \ 159 while (high-low > 3) { \ 160 t = (low+high)/2; \ 161 if (rp[t] > bcol) high = t; \ 162 else low = t; \ 163 } \ 164 for (_i=low; _i<high; _i++) { \ 165 if (rp[_i] > bcol) break; \ 166 if (rp[_i] == bcol) { \ 167 bap = ap + bs2*_i + bs*cidx + ridx; \ 168 if (addv == ADD_VALUES) *bap += value; \ 169 else *bap = value; \ 170 goto b_noinsert; \ 171 } \ 172 } \ 173 if (b->nonew == 1) goto b_noinsert; \ 174 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 175 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 176 CHKMEMQ;\ 177 N = nrow++ - 1; \ 178 /* shift up all the later entries in this row */ \ 179 for (ii=N; ii>=_i; ii--) { \ 180 rp[ii+1] = rp[ii]; \ 181 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 182 } \ 183 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 184 rp[_i] = bcol; \ 185 ap[bs2*_i + bs*cidx + ridx] = value; \ 186 b_noinsert:; \ 187 bilen[brow] = nrow; \ 188 } 189 190 #undef __FUNCT__ 191 #define __FUNCT__ "MatSetValues_MPIBAIJ" 192 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 193 { 194 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 195 MatScalar value; 196 PetscTruth roworiented = baij->roworiented; 197 PetscErrorCode ierr; 198 PetscInt i,j,row,col; 199 PetscInt rstart_orig=mat->rmap.rstart; 200 PetscInt rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart; 201 PetscInt cend_orig=mat->cmap.rend,bs=mat->rmap.bs; 202 203 /* Some Variables required in the macro */ 204 Mat A = baij->A; 205 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 206 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 207 MatScalar *aa=a->a; 208 209 Mat B = baij->B; 210 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 211 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 212 MatScalar *ba=b->a; 213 214 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 215 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 216 MatScalar *ap,*bap; 217 218 PetscFunctionBegin; 219 for (i=0; i<m; i++) { 220 if (im[i] < 0) continue; 221 #if defined(PETSC_USE_DEBUG) 222 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 223 #endif 224 if (im[i] >= rstart_orig && im[i] < rend_orig) { 225 row = im[i] - rstart_orig; 226 for (j=0; j<n; j++) { 227 if (in[j] >= cstart_orig && in[j] < cend_orig){ 228 col = in[j] - cstart_orig; 229 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 230 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } else if (in[j] < 0) continue; 233 #if defined(PETSC_USE_DEBUG) 234 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap.N-1);} 235 #endif 236 else { 237 if (mat->was_assembled) { 238 if (!baij->colmap) { 239 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 240 } 241 #if defined (PETSC_USE_CTABLE) 242 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 243 col = col - 1; 244 #else 245 col = baij->colmap[in[j]/bs] - 1; 246 #endif 247 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 248 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 249 col = in[j]; 250 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 251 B = baij->B; 252 b = (Mat_SeqBAIJ*)(B)->data; 253 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 254 ba=b->a; 255 } else col += in[j]%bs; 256 } else col = in[j]; 257 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 258 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 259 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 260 } 261 } 262 } else { 263 if (!baij->donotstash) { 264 if (roworiented) { 265 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 266 } else { 267 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 268 } 269 } 270 } 271 } 272 PetscFunctionReturn(0); 273 } 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 277 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 278 { 279 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 280 const PetscScalar *value; 281 MatScalar *barray=baij->barray; 282 PetscTruth roworiented = baij->roworiented; 283 PetscErrorCode ierr; 284 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 285 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 286 PetscInt cend=baij->cendbs,bs=mat->rmap.bs,bs2=baij->bs2; 287 288 PetscFunctionBegin; 289 if(!barray) { 290 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 291 baij->barray = barray; 292 } 293 294 if (roworiented) { 295 stepval = (n-1)*bs; 296 } else { 297 stepval = (m-1)*bs; 298 } 299 for (i=0; i<m; i++) { 300 if (im[i] < 0) continue; 301 #if defined(PETSC_USE_DEBUG) 302 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 303 #endif 304 if (im[i] >= rstart && im[i] < rend) { 305 row = im[i] - rstart; 306 for (j=0; j<n; j++) { 307 /* If NumCol = 1 then a copy is not required */ 308 if ((roworiented) && (n == 1)) { 309 barray = (MatScalar*)v + i*bs2; 310 } else if((!roworiented) && (m == 1)) { 311 barray = (MatScalar*)v + j*bs2; 312 } else { /* Here a copy is required */ 313 if (roworiented) { 314 value = v + i*(stepval+bs)*bs + j*bs; 315 } else { 316 value = v + j*(stepval+bs)*bs + i*bs; 317 } 318 for (ii=0; ii<bs; ii++,value+=stepval) { 319 for (jj=0; jj<bs; jj++) { 320 *barray++ = *value++; 321 } 322 } 323 barray -=bs2; 324 } 325 326 if (in[j] >= cstart && in[j] < cend){ 327 col = in[j] - cstart; 328 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 329 } 330 else if (in[j] < 0) continue; 331 #if defined(PETSC_USE_DEBUG) 332 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 333 #endif 334 else { 335 if (mat->was_assembled) { 336 if (!baij->colmap) { 337 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 338 } 339 340 #if defined(PETSC_USE_DEBUG) 341 #if defined (PETSC_USE_CTABLE) 342 { PetscInt data; 343 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 344 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 345 } 346 #else 347 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 348 #endif 349 #endif 350 #if defined (PETSC_USE_CTABLE) 351 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 352 col = (col - 1)/bs; 353 #else 354 col = (baij->colmap[in[j]] - 1)/bs; 355 #endif 356 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 357 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 358 col = in[j]; 359 } 360 } 361 else col = in[j]; 362 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 363 } 364 } 365 } else { 366 if (!baij->donotstash) { 367 if (roworiented) { 368 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 369 } else { 370 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 371 } 372 } 373 } 374 } 375 PetscFunctionReturn(0); 376 } 377 378 #define HASH_KEY 0.6180339887 379 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 380 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 381 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 382 #undef __FUNCT__ 383 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 384 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 385 { 386 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 387 PetscTruth roworiented = baij->roworiented; 388 PetscErrorCode ierr; 389 PetscInt i,j,row,col; 390 PetscInt rstart_orig=mat->rmap.rstart; 391 PetscInt rend_orig=mat->rmap.rend,Nbs=baij->Nbs; 392 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap.bs,*HT=baij->ht,idx; 393 PetscReal tmp; 394 MatScalar **HD = baij->hd,value; 395 #if defined(PETSC_USE_DEBUG) 396 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 397 #endif 398 399 PetscFunctionBegin; 400 401 for (i=0; i<m; i++) { 402 #if defined(PETSC_USE_DEBUG) 403 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 404 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 405 #endif 406 row = im[i]; 407 if (row >= rstart_orig && row < rend_orig) { 408 for (j=0; j<n; j++) { 409 col = in[j]; 410 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 411 /* Look up PetscInto the Hash Table */ 412 key = (row/bs)*Nbs+(col/bs)+1; 413 h1 = HASH(size,key,tmp); 414 415 416 idx = h1; 417 #if defined(PETSC_USE_DEBUG) 418 insert_ct++; 419 total_ct++; 420 if (HT[idx] != key) { 421 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 422 if (idx == size) { 423 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 424 if (idx == h1) { 425 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 426 } 427 } 428 } 429 #else 430 if (HT[idx] != key) { 431 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 432 if (idx == size) { 433 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 434 if (idx == h1) { 435 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 436 } 437 } 438 } 439 #endif 440 /* A HASH table entry is found, so insert the values at the correct address */ 441 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 442 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 443 } 444 } else { 445 if (!baij->donotstash) { 446 if (roworiented) { 447 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 448 } else { 449 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 450 } 451 } 452 } 453 } 454 #if defined(PETSC_USE_DEBUG) 455 baij->ht_total_ct = total_ct; 456 baij->ht_insert_ct = insert_ct; 457 #endif 458 PetscFunctionReturn(0); 459 } 460 461 #undef __FUNCT__ 462 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 463 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 464 { 465 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 466 PetscTruth roworiented = baij->roworiented; 467 PetscErrorCode ierr; 468 PetscInt i,j,ii,jj,row,col; 469 PetscInt rstart=baij->rstartbs; 470 PetscInt rend=mat->rmap.rend,stepval,bs=mat->rmap.bs,bs2=baij->bs2,nbs2=n*bs2; 471 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 472 PetscReal tmp; 473 MatScalar **HD = baij->hd,*baij_a; 474 const PetscScalar *v_t,*value; 475 #if defined(PETSC_USE_DEBUG) 476 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 477 #endif 478 479 PetscFunctionBegin; 480 481 if (roworiented) { 482 stepval = (n-1)*bs; 483 } else { 484 stepval = (m-1)*bs; 485 } 486 for (i=0; i<m; i++) { 487 #if defined(PETSC_USE_DEBUG) 488 if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 489 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 490 #endif 491 row = im[i]; 492 v_t = v + i*nbs2; 493 if (row >= rstart && row < rend) { 494 for (j=0; j<n; j++) { 495 col = in[j]; 496 497 /* Look up into the Hash Table */ 498 key = row*Nbs+col+1; 499 h1 = HASH(size,key,tmp); 500 501 idx = h1; 502 #if defined(PETSC_USE_DEBUG) 503 total_ct++; 504 insert_ct++; 505 if (HT[idx] != key) { 506 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 507 if (idx == size) { 508 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 509 if (idx == h1) { 510 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 511 } 512 } 513 } 514 #else 515 if (HT[idx] != key) { 516 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 517 if (idx == size) { 518 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 519 if (idx == h1) { 520 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 521 } 522 } 523 } 524 #endif 525 baij_a = HD[idx]; 526 if (roworiented) { 527 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 528 /* value = v + (i*(stepval+bs)+j)*bs; */ 529 value = v_t; 530 v_t += bs; 531 if (addv == ADD_VALUES) { 532 for (ii=0; ii<bs; ii++,value+=stepval) { 533 for (jj=ii; jj<bs2; jj+=bs) { 534 baij_a[jj] += *value++; 535 } 536 } 537 } else { 538 for (ii=0; ii<bs; ii++,value+=stepval) { 539 for (jj=ii; jj<bs2; jj+=bs) { 540 baij_a[jj] = *value++; 541 } 542 } 543 } 544 } else { 545 value = v + j*(stepval+bs)*bs + i*bs; 546 if (addv == ADD_VALUES) { 547 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 548 for (jj=0; jj<bs; jj++) { 549 baij_a[jj] += *value++; 550 } 551 } 552 } else { 553 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 554 for (jj=0; jj<bs; jj++) { 555 baij_a[jj] = *value++; 556 } 557 } 558 } 559 } 560 } 561 } else { 562 if (!baij->donotstash) { 563 if (roworiented) { 564 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 565 } else { 566 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 567 } 568 } 569 } 570 } 571 #if defined(PETSC_USE_DEBUG) 572 baij->ht_total_ct = total_ct; 573 baij->ht_insert_ct = insert_ct; 574 #endif 575 PetscFunctionReturn(0); 576 } 577 578 #undef __FUNCT__ 579 #define __FUNCT__ "MatGetValues_MPIBAIJ" 580 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 581 { 582 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 583 PetscErrorCode ierr; 584 PetscInt bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend; 585 PetscInt bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data; 586 587 PetscFunctionBegin; 588 for (i=0; i<m; i++) { 589 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 590 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 591 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 592 row = idxm[i] - bsrstart; 593 for (j=0; j<n; j++) { 594 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 595 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 596 if (idxn[j] >= bscstart && idxn[j] < bscend){ 597 col = idxn[j] - bscstart; 598 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 599 } else { 600 if (!baij->colmap) { 601 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 602 } 603 #if defined (PETSC_USE_CTABLE) 604 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 605 data --; 606 #else 607 data = baij->colmap[idxn[j]/bs]-1; 608 #endif 609 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 610 else { 611 col = data + idxn[j]%bs; 612 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 613 } 614 } 615 } 616 } else { 617 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 618 } 619 } 620 PetscFunctionReturn(0); 621 } 622 623 #undef __FUNCT__ 624 #define __FUNCT__ "MatNorm_MPIBAIJ" 625 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 626 { 627 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 628 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 629 PetscErrorCode ierr; 630 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap.bs,nz,row,col; 631 PetscReal sum = 0.0; 632 MatScalar *v; 633 634 PetscFunctionBegin; 635 if (baij->size == 1) { 636 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 637 } else { 638 if (type == NORM_FROBENIUS) { 639 v = amat->a; 640 nz = amat->nz*bs2; 641 for (i=0; i<nz; i++) { 642 #if defined(PETSC_USE_COMPLEX) 643 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 644 #else 645 sum += (*v)*(*v); v++; 646 #endif 647 } 648 v = bmat->a; 649 nz = bmat->nz*bs2; 650 for (i=0; i<nz; i++) { 651 #if defined(PETSC_USE_COMPLEX) 652 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 653 #else 654 sum += (*v)*(*v); v++; 655 #endif 656 } 657 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 658 *nrm = sqrt(*nrm); 659 } else if (type == NORM_1) { /* max column sum */ 660 PetscReal *tmp,*tmp2; 661 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 662 ierr = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 663 tmp2 = tmp + mat->cmap.N; 664 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 665 v = amat->a; jj = amat->j; 666 for (i=0; i<amat->nz; i++) { 667 for (j=0; j<bs; j++){ 668 col = bs*(cstart + *jj) + j; /* column index */ 669 for (row=0; row<bs; row++){ 670 tmp[col] += PetscAbsScalar(*v); v++; 671 } 672 } 673 jj++; 674 } 675 v = bmat->a; jj = bmat->j; 676 for (i=0; i<bmat->nz; i++) { 677 for (j=0; j<bs; j++){ 678 col = bs*garray[*jj] + j; 679 for (row=0; row<bs; row++){ 680 tmp[col] += PetscAbsScalar(*v); v++; 681 } 682 } 683 jj++; 684 } 685 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 686 *nrm = 0.0; 687 for (j=0; j<mat->cmap.N; j++) { 688 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 689 } 690 ierr = PetscFree(tmp);CHKERRQ(ierr); 691 } else if (type == NORM_INFINITY) { /* max row sum */ 692 PetscReal *sums; 693 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr) 694 sum = 0.0; 695 for (j=0; j<amat->mbs; j++) { 696 for (row=0; row<bs; row++) sums[row] = 0.0; 697 v = amat->a + bs2*amat->i[j]; 698 nz = amat->i[j+1]-amat->i[j]; 699 for (i=0; i<nz; i++) { 700 for (col=0; col<bs; col++){ 701 for (row=0; row<bs; row++){ 702 sums[row] += PetscAbsScalar(*v); v++; 703 } 704 } 705 } 706 v = bmat->a + bs2*bmat->i[j]; 707 nz = bmat->i[j+1]-bmat->i[j]; 708 for (i=0; i<nz; i++) { 709 for (col=0; col<bs; col++){ 710 for (row=0; row<bs; row++){ 711 sums[row] += PetscAbsScalar(*v); v++; 712 } 713 } 714 } 715 for (row=0; row<bs; row++){ 716 if (sums[row] > sum) sum = sums[row]; 717 } 718 } 719 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 720 ierr = PetscFree(sums);CHKERRQ(ierr); 721 } else { 722 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 723 } 724 } 725 PetscFunctionReturn(0); 726 } 727 728 /* 729 Creates the hash table, and sets the table 730 This table is created only once. 731 If new entried need to be added to the matrix 732 then the hash table has to be destroyed and 733 recreated. 734 */ 735 #undef __FUNCT__ 736 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 737 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 738 { 739 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 740 Mat A = baij->A,B=baij->B; 741 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 742 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 743 PetscErrorCode ierr; 744 PetscInt size,bs2=baij->bs2,rstart=baij->rstartbs; 745 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 746 PetscInt *HT,key; 747 MatScalar **HD; 748 PetscReal tmp; 749 #if defined(PETSC_USE_INFO) 750 PetscInt ct=0,max=0; 751 #endif 752 753 PetscFunctionBegin; 754 baij->ht_size=(PetscInt)(factor*nz); 755 size = baij->ht_size; 756 757 if (baij->ht) { 758 PetscFunctionReturn(0); 759 } 760 761 /* Allocate Memory for Hash Table */ 762 ierr = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 763 baij->ht = (PetscInt*)(baij->hd + size); 764 HD = baij->hd; 765 HT = baij->ht; 766 767 768 ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr); 769 770 771 /* Loop Over A */ 772 for (i=0; i<a->mbs; i++) { 773 for (j=ai[i]; j<ai[i+1]; j++) { 774 row = i+rstart; 775 col = aj[j]+cstart; 776 777 key = row*Nbs + col + 1; 778 h1 = HASH(size,key,tmp); 779 for (k=0; k<size; k++){ 780 if (!HT[(h1+k)%size]) { 781 HT[(h1+k)%size] = key; 782 HD[(h1+k)%size] = a->a + j*bs2; 783 break; 784 #if defined(PETSC_USE_INFO) 785 } else { 786 ct++; 787 #endif 788 } 789 } 790 #if defined(PETSC_USE_INFO) 791 if (k> max) max = k; 792 #endif 793 } 794 } 795 /* Loop Over B */ 796 for (i=0; i<b->mbs; i++) { 797 for (j=bi[i]; j<bi[i+1]; j++) { 798 row = i+rstart; 799 col = garray[bj[j]]; 800 key = row*Nbs + col + 1; 801 h1 = HASH(size,key,tmp); 802 for (k=0; k<size; k++){ 803 if (!HT[(h1+k)%size]) { 804 HT[(h1+k)%size] = key; 805 HD[(h1+k)%size] = b->a + j*bs2; 806 break; 807 #if defined(PETSC_USE_INFO) 808 } else { 809 ct++; 810 #endif 811 } 812 } 813 #if defined(PETSC_USE_INFO) 814 if (k> max) max = k; 815 #endif 816 } 817 } 818 819 /* Print Summary */ 820 #if defined(PETSC_USE_INFO) 821 for (i=0,j=0; i<size; i++) { 822 if (HT[i]) {j++;} 823 } 824 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 825 #endif 826 PetscFunctionReturn(0); 827 } 828 829 #undef __FUNCT__ 830 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 831 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 832 { 833 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 834 PetscErrorCode ierr; 835 PetscInt nstash,reallocs; 836 InsertMode addv; 837 838 PetscFunctionBegin; 839 if (baij->donotstash) { 840 PetscFunctionReturn(0); 841 } 842 843 /* make sure all processors are either in INSERTMODE or ADDMODE */ 844 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 845 if (addv == (ADD_VALUES|INSERT_VALUES)) { 846 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 847 } 848 mat->insertmode = addv; /* in case this processor had no cache */ 849 850 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr); 851 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 852 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 853 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 854 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 855 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 856 PetscFunctionReturn(0); 857 } 858 859 #undef __FUNCT__ 860 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 861 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 862 { 863 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 864 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 865 PetscErrorCode ierr; 866 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 867 PetscInt *row,*col; 868 PetscTruth r1,r2,r3,other_disassembled; 869 MatScalar *val; 870 InsertMode addv = mat->insertmode; 871 PetscMPIInt n; 872 873 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 874 PetscFunctionBegin; 875 if (!baij->donotstash) { 876 while (1) { 877 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 878 if (!flg) break; 879 880 for (i=0; i<n;) { 881 /* Now identify the consecutive vals belonging to the same row */ 882 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 883 if (j < n) ncols = j-i; 884 else ncols = n-i; 885 /* Now assemble all these values with a single function call */ 886 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 887 i = j; 888 } 889 } 890 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 891 /* Now process the block-stash. Since the values are stashed column-oriented, 892 set the roworiented flag to column oriented, and after MatSetValues() 893 restore the original flags */ 894 r1 = baij->roworiented; 895 r2 = a->roworiented; 896 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 897 baij->roworiented = PETSC_FALSE; 898 a->roworiented = PETSC_FALSE; 899 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 900 while (1) { 901 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 902 if (!flg) break; 903 904 for (i=0; i<n;) { 905 /* Now identify the consecutive vals belonging to the same row */ 906 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 907 if (j < n) ncols = j-i; 908 else ncols = n-i; 909 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 910 i = j; 911 } 912 } 913 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 914 baij->roworiented = r1; 915 a->roworiented = r2; 916 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 917 } 918 919 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 920 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 921 922 /* determine if any processor has disassembled, if so we must 923 also disassemble ourselfs, in order that we may reassemble. */ 924 /* 925 if nonzero structure of submatrix B cannot change then we know that 926 no processor disassembled thus we can skip this stuff 927 */ 928 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 929 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 930 if (mat->was_assembled && !other_disassembled) { 931 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 932 } 933 } 934 935 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 936 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 937 } 938 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 939 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 940 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 941 942 #if defined(PETSC_USE_INFO) 943 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 944 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 945 baij->ht_total_ct = 0; 946 baij->ht_insert_ct = 0; 947 } 948 #endif 949 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 950 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 951 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 952 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 953 } 954 955 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 956 baij->rowvalues = 0; 957 PetscFunctionReturn(0); 958 } 959 960 #undef __FUNCT__ 961 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 962 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 963 { 964 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 965 PetscErrorCode ierr; 966 PetscMPIInt size = baij->size,rank = baij->rank; 967 PetscInt bs = mat->rmap.bs; 968 PetscTruth iascii,isdraw; 969 PetscViewer sviewer; 970 PetscViewerFormat format; 971 972 PetscFunctionBegin; 973 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 974 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 975 if (iascii) { 976 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 977 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 978 MatInfo info; 979 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 980 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 981 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 982 rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 983 mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr); 984 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 985 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 986 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 987 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 988 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 989 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 990 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 991 PetscFunctionReturn(0); 992 } else if (format == PETSC_VIEWER_ASCII_INFO) { 993 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 994 PetscFunctionReturn(0); 995 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 996 PetscFunctionReturn(0); 997 } 998 } 999 1000 if (isdraw) { 1001 PetscDraw draw; 1002 PetscTruth isnull; 1003 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1004 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1005 } 1006 1007 if (size == 1) { 1008 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1009 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1010 } else { 1011 /* assemble the entire matrix onto first processor. */ 1012 Mat A; 1013 Mat_SeqBAIJ *Aloc; 1014 PetscInt M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1015 MatScalar *a; 1016 1017 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1018 /* Perhaps this should be the type of mat? */ 1019 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1020 if (!rank) { 1021 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1022 } else { 1023 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1024 } 1025 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1026 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1027 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1028 1029 /* copy over the A part */ 1030 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1031 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1032 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1033 1034 for (i=0; i<mbs; i++) { 1035 rvals[0] = bs*(baij->rstartbs + i); 1036 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1037 for (j=ai[i]; j<ai[i+1]; j++) { 1038 col = (baij->cstartbs+aj[j])*bs; 1039 for (k=0; k<bs; k++) { 1040 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1041 col++; a += bs; 1042 } 1043 } 1044 } 1045 /* copy over the B part */ 1046 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1047 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1048 for (i=0; i<mbs; i++) { 1049 rvals[0] = bs*(baij->rstartbs + i); 1050 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1051 for (j=ai[i]; j<ai[i+1]; j++) { 1052 col = baij->garray[aj[j]]*bs; 1053 for (k=0; k<bs; k++) { 1054 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1055 col++; a += bs; 1056 } 1057 } 1058 } 1059 ierr = PetscFree(rvals);CHKERRQ(ierr); 1060 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1061 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1062 /* 1063 Everyone has to call to draw the matrix since the graphics waits are 1064 synchronized across all processors that share the PetscDraw object 1065 */ 1066 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1067 if (!rank) { 1068 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1069 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1070 } 1071 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1072 ierr = MatDestroy(A);CHKERRQ(ierr); 1073 } 1074 PetscFunctionReturn(0); 1075 } 1076 1077 #undef __FUNCT__ 1078 #define __FUNCT__ "MatView_MPIBAIJ" 1079 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1080 { 1081 PetscErrorCode ierr; 1082 PetscTruth iascii,isdraw,issocket,isbinary; 1083 1084 PetscFunctionBegin; 1085 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1086 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1087 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1088 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1089 if (iascii || isdraw || issocket || isbinary) { 1090 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1091 } else { 1092 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1093 } 1094 PetscFunctionReturn(0); 1095 } 1096 1097 #undef __FUNCT__ 1098 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1099 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1100 { 1101 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1102 PetscErrorCode ierr; 1103 1104 PetscFunctionBegin; 1105 #if defined(PETSC_USE_LOG) 1106 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N); 1107 #endif 1108 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1109 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1110 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1111 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1112 #if defined (PETSC_USE_CTABLE) 1113 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 1114 #else 1115 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1116 #endif 1117 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1118 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1119 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1120 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1121 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1122 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 1123 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1124 ierr = PetscFree(baij);CHKERRQ(ierr); 1125 1126 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1127 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1128 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1129 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1130 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1131 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1132 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1133 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1134 PetscFunctionReturn(0); 1135 } 1136 1137 #undef __FUNCT__ 1138 #define __FUNCT__ "MatMult_MPIBAIJ" 1139 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1140 { 1141 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1142 PetscErrorCode ierr; 1143 PetscInt nt; 1144 1145 PetscFunctionBegin; 1146 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1147 if (nt != A->cmap.n) { 1148 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1149 } 1150 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1151 if (nt != A->rmap.n) { 1152 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1153 } 1154 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1155 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1156 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1157 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1158 PetscFunctionReturn(0); 1159 } 1160 1161 #undef __FUNCT__ 1162 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1163 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1164 { 1165 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1166 PetscErrorCode ierr; 1167 1168 PetscFunctionBegin; 1169 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1170 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1171 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1172 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1173 PetscFunctionReturn(0); 1174 } 1175 1176 #undef __FUNCT__ 1177 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1178 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1179 { 1180 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1181 PetscErrorCode ierr; 1182 PetscTruth merged; 1183 1184 PetscFunctionBegin; 1185 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1186 /* do nondiagonal part */ 1187 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1188 if (!merged) { 1189 /* send it on its way */ 1190 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1191 /* do local part */ 1192 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1193 /* receive remote parts: note this assumes the values are not actually */ 1194 /* inserted in yy until the next line */ 1195 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1196 } else { 1197 /* do local part */ 1198 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1199 /* send it on its way */ 1200 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1201 /* values actually were received in the Begin() but we need to call this nop */ 1202 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1203 } 1204 PetscFunctionReturn(0); 1205 } 1206 1207 #undef __FUNCT__ 1208 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1209 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1210 { 1211 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1212 PetscErrorCode ierr; 1213 1214 PetscFunctionBegin; 1215 /* do nondiagonal part */ 1216 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1217 /* send it on its way */ 1218 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1219 /* do local part */ 1220 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1221 /* receive remote parts: note this assumes the values are not actually */ 1222 /* inserted in yy until the next line, which is true for my implementation*/ 1223 /* but is not perhaps always true. */ 1224 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 /* 1229 This only works correctly for square matrices where the subblock A->A is the 1230 diagonal block 1231 */ 1232 #undef __FUNCT__ 1233 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1234 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1235 { 1236 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1237 PetscErrorCode ierr; 1238 1239 PetscFunctionBegin; 1240 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1241 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1242 PetscFunctionReturn(0); 1243 } 1244 1245 #undef __FUNCT__ 1246 #define __FUNCT__ "MatScale_MPIBAIJ" 1247 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1248 { 1249 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1250 PetscErrorCode ierr; 1251 1252 PetscFunctionBegin; 1253 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1254 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1260 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1261 { 1262 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1263 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1264 PetscErrorCode ierr; 1265 PetscInt bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1266 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend; 1267 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1268 1269 PetscFunctionBegin; 1270 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1271 mat->getrowactive = PETSC_TRUE; 1272 1273 if (!mat->rowvalues && (idx || v)) { 1274 /* 1275 allocate enough space to hold information from the longest row. 1276 */ 1277 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1278 PetscInt max = 1,mbs = mat->mbs,tmp; 1279 for (i=0; i<mbs; i++) { 1280 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1281 if (max < tmp) { max = tmp; } 1282 } 1283 ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1284 mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1285 } 1286 1287 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1288 lrow = row - brstart; 1289 1290 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1291 if (!v) {pvA = 0; pvB = 0;} 1292 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1293 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1294 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1295 nztot = nzA + nzB; 1296 1297 cmap = mat->garray; 1298 if (v || idx) { 1299 if (nztot) { 1300 /* Sort by increasing column numbers, assuming A and B already sorted */ 1301 PetscInt imark = -1; 1302 if (v) { 1303 *v = v_p = mat->rowvalues; 1304 for (i=0; i<nzB; i++) { 1305 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1306 else break; 1307 } 1308 imark = i; 1309 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1310 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1311 } 1312 if (idx) { 1313 *idx = idx_p = mat->rowindices; 1314 if (imark > -1) { 1315 for (i=0; i<imark; i++) { 1316 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1317 } 1318 } else { 1319 for (i=0; i<nzB; i++) { 1320 if (cmap[cworkB[i]/bs] < cstart) 1321 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1322 else break; 1323 } 1324 imark = i; 1325 } 1326 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1327 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1328 } 1329 } else { 1330 if (idx) *idx = 0; 1331 if (v) *v = 0; 1332 } 1333 } 1334 *nz = nztot; 1335 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1336 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1337 PetscFunctionReturn(0); 1338 } 1339 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1342 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1343 { 1344 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1345 1346 PetscFunctionBegin; 1347 if (!baij->getrowactive) { 1348 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1349 } 1350 baij->getrowactive = PETSC_FALSE; 1351 PetscFunctionReturn(0); 1352 } 1353 1354 #undef __FUNCT__ 1355 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1356 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1357 { 1358 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1359 PetscErrorCode ierr; 1360 1361 PetscFunctionBegin; 1362 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1363 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1364 PetscFunctionReturn(0); 1365 } 1366 1367 #undef __FUNCT__ 1368 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1369 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1370 { 1371 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1372 Mat A = a->A,B = a->B; 1373 PetscErrorCode ierr; 1374 PetscReal isend[5],irecv[5]; 1375 1376 PetscFunctionBegin; 1377 info->block_size = (PetscReal)matin->rmap.bs; 1378 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1379 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1380 isend[3] = info->memory; isend[4] = info->mallocs; 1381 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1382 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1383 isend[3] += info->memory; isend[4] += info->mallocs; 1384 if (flag == MAT_LOCAL) { 1385 info->nz_used = isend[0]; 1386 info->nz_allocated = isend[1]; 1387 info->nz_unneeded = isend[2]; 1388 info->memory = isend[3]; 1389 info->mallocs = isend[4]; 1390 } else if (flag == MAT_GLOBAL_MAX) { 1391 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1392 info->nz_used = irecv[0]; 1393 info->nz_allocated = irecv[1]; 1394 info->nz_unneeded = irecv[2]; 1395 info->memory = irecv[3]; 1396 info->mallocs = irecv[4]; 1397 } else if (flag == MAT_GLOBAL_SUM) { 1398 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1399 info->nz_used = irecv[0]; 1400 info->nz_allocated = irecv[1]; 1401 info->nz_unneeded = irecv[2]; 1402 info->memory = irecv[3]; 1403 info->mallocs = irecv[4]; 1404 } else { 1405 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1406 } 1407 info->rows_global = (PetscReal)A->rmap.N; 1408 info->columns_global = (PetscReal)A->cmap.N; 1409 info->rows_local = (PetscReal)A->rmap.N; 1410 info->columns_local = (PetscReal)A->cmap.N; 1411 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1412 info->fill_ratio_needed = 0; 1413 info->factor_mallocs = 0; 1414 PetscFunctionReturn(0); 1415 } 1416 1417 #undef __FUNCT__ 1418 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1419 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg) 1420 { 1421 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1422 PetscErrorCode ierr; 1423 1424 PetscFunctionBegin; 1425 switch (op) { 1426 case MAT_NEW_NONZERO_LOCATIONS: 1427 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1428 case MAT_KEEP_ZEROED_ROWS: 1429 case MAT_NEW_NONZERO_LOCATION_ERR: 1430 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1431 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1432 break; 1433 case MAT_ROW_ORIENTED: 1434 a->roworiented = flg; 1435 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1436 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1437 break; 1438 case MAT_NEW_DIAGONALS: 1439 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1440 break; 1441 case MAT_IGNORE_OFF_PROC_ENTRIES: 1442 a->donotstash = flg; 1443 break; 1444 case MAT_USE_HASH_TABLE: 1445 a->ht_flag = flg; 1446 break; 1447 case MAT_SYMMETRIC: 1448 case MAT_STRUCTURALLY_SYMMETRIC: 1449 case MAT_HERMITIAN: 1450 case MAT_SYMMETRY_ETERNAL: 1451 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1452 break; 1453 default: 1454 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1455 } 1456 PetscFunctionReturn(0); 1457 } 1458 1459 #undef __FUNCT__ 1460 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1461 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1462 { 1463 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1464 Mat_SeqBAIJ *Aloc; 1465 Mat B; 1466 PetscErrorCode ierr; 1467 PetscInt M=A->rmap.N,N=A->cmap.N,*ai,*aj,i,*rvals,j,k,col; 1468 PetscInt bs=A->rmap.bs,mbs=baij->mbs; 1469 MatScalar *a; 1470 1471 PetscFunctionBegin; 1472 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1473 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1474 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1475 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1476 ierr = MatMPIBAIJSetPreallocation(B,A->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1477 1478 /* copy over the A part */ 1479 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1480 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1481 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1482 1483 for (i=0; i<mbs; i++) { 1484 rvals[0] = bs*(baij->rstartbs + i); 1485 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1486 for (j=ai[i]; j<ai[i+1]; j++) { 1487 col = (baij->cstartbs+aj[j])*bs; 1488 for (k=0; k<bs; k++) { 1489 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1490 col++; a += bs; 1491 } 1492 } 1493 } 1494 /* copy over the B part */ 1495 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1496 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1497 for (i=0; i<mbs; i++) { 1498 rvals[0] = bs*(baij->rstartbs + i); 1499 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1500 for (j=ai[i]; j<ai[i+1]; j++) { 1501 col = baij->garray[aj[j]]*bs; 1502 for (k=0; k<bs; k++) { 1503 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1504 col++; a += bs; 1505 } 1506 } 1507 } 1508 ierr = PetscFree(rvals);CHKERRQ(ierr); 1509 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1510 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1511 1512 if (matout) { 1513 *matout = B; 1514 } else { 1515 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1516 } 1517 PetscFunctionReturn(0); 1518 } 1519 1520 #undef __FUNCT__ 1521 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1522 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1523 { 1524 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1525 Mat a = baij->A,b = baij->B; 1526 PetscErrorCode ierr; 1527 PetscInt s1,s2,s3; 1528 1529 PetscFunctionBegin; 1530 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1531 if (rr) { 1532 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1533 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1534 /* Overlap communication with computation. */ 1535 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1536 } 1537 if (ll) { 1538 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1539 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1540 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1541 } 1542 /* scale the diagonal block */ 1543 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1544 1545 if (rr) { 1546 /* Do a scatter end and then right scale the off-diagonal block */ 1547 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1548 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1549 } 1550 1551 PetscFunctionReturn(0); 1552 } 1553 1554 #undef __FUNCT__ 1555 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1556 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 1557 { 1558 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1559 PetscErrorCode ierr; 1560 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1561 PetscInt i,*owners = A->rmap.range; 1562 PetscInt *nprocs,j,idx,nsends,row; 1563 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1564 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1565 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap.rstart; 1566 MPI_Comm comm = ((PetscObject)A)->comm; 1567 MPI_Request *send_waits,*recv_waits; 1568 MPI_Status recv_status,*send_status; 1569 #if defined(PETSC_DEBUG) 1570 PetscTruth found = PETSC_FALSE; 1571 #endif 1572 1573 PetscFunctionBegin; 1574 /* first count number of contributors to each processor */ 1575 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1576 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1577 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1578 j = 0; 1579 for (i=0; i<N; i++) { 1580 if (lastidx > (idx = rows[i])) j = 0; 1581 lastidx = idx; 1582 for (; j<size; j++) { 1583 if (idx >= owners[j] && idx < owners[j+1]) { 1584 nprocs[2*j]++; 1585 nprocs[2*j+1] = 1; 1586 owner[i] = j; 1587 #if defined(PETSC_DEBUG) 1588 found = PETSC_TRUE; 1589 #endif 1590 break; 1591 } 1592 } 1593 #if defined(PETSC_DEBUG) 1594 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1595 found = PETSC_FALSE; 1596 #endif 1597 } 1598 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1599 1600 /* inform other processors of number of messages and max length*/ 1601 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1602 1603 /* post receives: */ 1604 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1605 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1606 for (i=0; i<nrecvs; i++) { 1607 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1608 } 1609 1610 /* do sends: 1611 1) starts[i] gives the starting index in svalues for stuff going to 1612 the ith processor 1613 */ 1614 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1615 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1616 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1617 starts[0] = 0; 1618 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1619 for (i=0; i<N; i++) { 1620 svalues[starts[owner[i]]++] = rows[i]; 1621 } 1622 1623 starts[0] = 0; 1624 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1625 count = 0; 1626 for (i=0; i<size; i++) { 1627 if (nprocs[2*i+1]) { 1628 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1629 } 1630 } 1631 ierr = PetscFree(starts);CHKERRQ(ierr); 1632 1633 base = owners[rank]; 1634 1635 /* wait on receives */ 1636 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 1637 source = lens + nrecvs; 1638 count = nrecvs; slen = 0; 1639 while (count) { 1640 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1641 /* unpack receives into our local space */ 1642 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1643 source[imdex] = recv_status.MPI_SOURCE; 1644 lens[imdex] = n; 1645 slen += n; 1646 count--; 1647 } 1648 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1649 1650 /* move the data into the send scatter */ 1651 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1652 count = 0; 1653 for (i=0; i<nrecvs; i++) { 1654 values = rvalues + i*nmax; 1655 for (j=0; j<lens[i]; j++) { 1656 lrows[count++] = values[j] - base; 1657 } 1658 } 1659 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1660 ierr = PetscFree(lens);CHKERRQ(ierr); 1661 ierr = PetscFree(owner);CHKERRQ(ierr); 1662 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1663 1664 /* actually zap the local rows */ 1665 /* 1666 Zero the required rows. If the "diagonal block" of the matrix 1667 is square and the user wishes to set the diagonal we use separate 1668 code so that MatSetValues() is not called for each diagonal allocating 1669 new memory, thus calling lots of mallocs and slowing things down. 1670 1671 Contributed by: Matthew Knepley 1672 */ 1673 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1674 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr); 1675 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 1676 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr); 1677 } else if (diag != 0.0) { 1678 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1679 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1680 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1681 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1682 } 1683 for (i=0; i<slen; i++) { 1684 row = lrows[i] + rstart_bs; 1685 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1686 } 1687 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1688 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1689 } else { 1690 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1691 } 1692 1693 ierr = PetscFree(lrows);CHKERRQ(ierr); 1694 1695 /* wait on sends */ 1696 if (nsends) { 1697 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1698 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1699 ierr = PetscFree(send_status);CHKERRQ(ierr); 1700 } 1701 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1702 ierr = PetscFree(svalues);CHKERRQ(ierr); 1703 1704 PetscFunctionReturn(0); 1705 } 1706 1707 #undef __FUNCT__ 1708 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1709 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1710 { 1711 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1712 PetscErrorCode ierr; 1713 1714 PetscFunctionBegin; 1715 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1716 PetscFunctionReturn(0); 1717 } 1718 1719 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1720 1721 #undef __FUNCT__ 1722 #define __FUNCT__ "MatEqual_MPIBAIJ" 1723 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1724 { 1725 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1726 Mat a,b,c,d; 1727 PetscTruth flg; 1728 PetscErrorCode ierr; 1729 1730 PetscFunctionBegin; 1731 a = matA->A; b = matA->B; 1732 c = matB->A; d = matB->B; 1733 1734 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1735 if (flg) { 1736 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1737 } 1738 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1739 PetscFunctionReturn(0); 1740 } 1741 1742 #undef __FUNCT__ 1743 #define __FUNCT__ "MatCopy_MPIBAIJ" 1744 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1745 { 1746 PetscErrorCode ierr; 1747 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1748 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1749 1750 PetscFunctionBegin; 1751 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1752 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1753 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1754 } else { 1755 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1756 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1757 } 1758 PetscFunctionReturn(0); 1759 } 1760 1761 #undef __FUNCT__ 1762 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1763 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1764 { 1765 PetscErrorCode ierr; 1766 1767 PetscFunctionBegin; 1768 ierr = MatMPIBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1769 PetscFunctionReturn(0); 1770 } 1771 1772 #include "petscblaslapack.h" 1773 #undef __FUNCT__ 1774 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1775 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1776 { 1777 PetscErrorCode ierr; 1778 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1779 PetscBLASInt bnz,one=1; 1780 Mat_SeqBAIJ *x,*y; 1781 1782 PetscFunctionBegin; 1783 if (str == SAME_NONZERO_PATTERN) { 1784 PetscScalar alpha = a; 1785 x = (Mat_SeqBAIJ *)xx->A->data; 1786 y = (Mat_SeqBAIJ *)yy->A->data; 1787 bnz = PetscBLASIntCast(x->nz); 1788 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1789 x = (Mat_SeqBAIJ *)xx->B->data; 1790 y = (Mat_SeqBAIJ *)yy->B->data; 1791 bnz = PetscBLASIntCast(x->nz); 1792 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1793 } else { 1794 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1795 } 1796 PetscFunctionReturn(0); 1797 } 1798 1799 #undef __FUNCT__ 1800 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1801 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1802 { 1803 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1804 PetscErrorCode ierr; 1805 1806 PetscFunctionBegin; 1807 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1808 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1809 PetscFunctionReturn(0); 1810 } 1811 1812 #undef __FUNCT__ 1813 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 1814 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1815 { 1816 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1817 PetscErrorCode ierr; 1818 1819 PetscFunctionBegin; 1820 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1821 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1822 PetscFunctionReturn(0); 1823 } 1824 1825 /* -------------------------------------------------------------------*/ 1826 static struct _MatOps MatOps_Values = { 1827 MatSetValues_MPIBAIJ, 1828 MatGetRow_MPIBAIJ, 1829 MatRestoreRow_MPIBAIJ, 1830 MatMult_MPIBAIJ, 1831 /* 4*/ MatMultAdd_MPIBAIJ, 1832 MatMultTranspose_MPIBAIJ, 1833 MatMultTransposeAdd_MPIBAIJ, 1834 0, 1835 0, 1836 0, 1837 /*10*/ 0, 1838 0, 1839 0, 1840 0, 1841 MatTranspose_MPIBAIJ, 1842 /*15*/ MatGetInfo_MPIBAIJ, 1843 MatEqual_MPIBAIJ, 1844 MatGetDiagonal_MPIBAIJ, 1845 MatDiagonalScale_MPIBAIJ, 1846 MatNorm_MPIBAIJ, 1847 /*20*/ MatAssemblyBegin_MPIBAIJ, 1848 MatAssemblyEnd_MPIBAIJ, 1849 0, 1850 MatSetOption_MPIBAIJ, 1851 MatZeroEntries_MPIBAIJ, 1852 /*25*/ MatZeroRows_MPIBAIJ, 1853 0, 1854 0, 1855 0, 1856 0, 1857 /*30*/ MatSetUpPreallocation_MPIBAIJ, 1858 0, 1859 0, 1860 0, 1861 0, 1862 /*35*/ MatDuplicate_MPIBAIJ, 1863 0, 1864 0, 1865 0, 1866 0, 1867 /*40*/ MatAXPY_MPIBAIJ, 1868 MatGetSubMatrices_MPIBAIJ, 1869 MatIncreaseOverlap_MPIBAIJ, 1870 MatGetValues_MPIBAIJ, 1871 MatCopy_MPIBAIJ, 1872 /*45*/ 0, 1873 MatScale_MPIBAIJ, 1874 0, 1875 0, 1876 0, 1877 /*50*/ 0, 1878 0, 1879 0, 1880 0, 1881 0, 1882 /*55*/ 0, 1883 0, 1884 MatSetUnfactored_MPIBAIJ, 1885 0, 1886 MatSetValuesBlocked_MPIBAIJ, 1887 /*60*/ 0, 1888 MatDestroy_MPIBAIJ, 1889 MatView_MPIBAIJ, 1890 0, 1891 0, 1892 /*65*/ 0, 1893 0, 1894 0, 1895 0, 1896 0, 1897 /*70*/ MatGetRowMaxAbs_MPIBAIJ, 1898 0, 1899 0, 1900 0, 1901 0, 1902 /*75*/ 0, 1903 0, 1904 0, 1905 0, 1906 0, 1907 /*80*/ 0, 1908 0, 1909 0, 1910 0, 1911 MatLoad_MPIBAIJ, 1912 /*85*/ 0, 1913 0, 1914 0, 1915 0, 1916 0, 1917 /*90*/ 0, 1918 0, 1919 0, 1920 0, 1921 0, 1922 /*95*/ 0, 1923 0, 1924 0, 1925 0, 1926 0, 1927 /*100*/0, 1928 0, 1929 0, 1930 0, 1931 0, 1932 /*105*/0, 1933 MatRealPart_MPIBAIJ, 1934 MatImaginaryPart_MPIBAIJ}; 1935 1936 1937 EXTERN_C_BEGIN 1938 #undef __FUNCT__ 1939 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 1940 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 1941 { 1942 PetscFunctionBegin; 1943 *a = ((Mat_MPIBAIJ *)A->data)->A; 1944 *iscopy = PETSC_FALSE; 1945 PetscFunctionReturn(0); 1946 } 1947 EXTERN_C_END 1948 1949 EXTERN_C_BEGIN 1950 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 1951 EXTERN_C_END 1952 1953 #undef __FUNCT__ 1954 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 1955 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 1956 { 1957 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)B->data; 1958 PetscInt m = B->rmap.n/bs,cstart = baij->cstartbs, cend = baij->cendbs,j,nnz,i,d; 1959 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = baij->rstartbs,ii; 1960 const PetscInt *JJ; 1961 PetscScalar *values; 1962 PetscErrorCode ierr; 1963 1964 PetscFunctionBegin; 1965 #if defined(PETSC_OPT_g) 1966 if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"Ii[0] must be 0 it is %D",Ii[0]); 1967 #endif 1968 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 1969 o_nnz = d_nnz + m; 1970 1971 for (i=0; i<m; i++) { 1972 nnz = Ii[i+1]- Ii[i]; 1973 JJ = J + Ii[i]; 1974 nnz_max = PetscMax(nnz_max,nnz); 1975 #if defined(PETSC_OPT_g) 1976 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 1977 #endif 1978 for (j=0; j<nnz; j++) { 1979 if (*JJ >= cstart) break; 1980 JJ++; 1981 } 1982 d = 0; 1983 for (; j<nnz; j++) { 1984 if (*JJ++ >= cend) break; 1985 d++; 1986 } 1987 d_nnz[i] = d; 1988 o_nnz[i] = nnz - d; 1989 } 1990 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1991 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 1992 1993 if (v) values = (PetscScalar*)v; 1994 else { 1995 ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 1996 ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 1997 } 1998 1999 for (i=0; i<m; i++) { 2000 ii = i + rstart; 2001 nnz = Ii[i+1]- Ii[i]; 2002 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2003 } 2004 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2005 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2006 2007 if (!v) { 2008 ierr = PetscFree(values);CHKERRQ(ierr); 2009 } 2010 PetscFunctionReturn(0); 2011 } 2012 2013 #undef __FUNCT__ 2014 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2015 /*@C 2016 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2017 (the default parallel PETSc format). 2018 2019 Collective on MPI_Comm 2020 2021 Input Parameters: 2022 + A - the matrix 2023 . i - the indices into j for the start of each local row (starts with zero) 2024 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2025 - v - optional values in the matrix 2026 2027 Level: developer 2028 2029 .keywords: matrix, aij, compressed row, sparse, parallel 2030 2031 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2032 @*/ 2033 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2034 { 2035 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2036 2037 PetscFunctionBegin; 2038 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2039 if (f) { 2040 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2041 } 2042 PetscFunctionReturn(0); 2043 } 2044 2045 EXTERN_C_BEGIN 2046 #undef __FUNCT__ 2047 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2048 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2049 { 2050 Mat_MPIBAIJ *b; 2051 PetscErrorCode ierr; 2052 PetscInt i; 2053 2054 PetscFunctionBegin; 2055 B->preallocated = PETSC_TRUE; 2056 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 2057 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2058 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2059 2060 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2061 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2062 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2063 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2064 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2065 2066 B->rmap.bs = bs; 2067 B->cmap.bs = bs; 2068 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2069 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2070 2071 if (d_nnz) { 2072 for (i=0; i<B->rmap.n/bs; i++) { 2073 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2074 } 2075 } 2076 if (o_nnz) { 2077 for (i=0; i<B->rmap.n/bs; i++) { 2078 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2079 } 2080 } 2081 2082 b = (Mat_MPIBAIJ*)B->data; 2083 b->bs2 = bs*bs; 2084 b->mbs = B->rmap.n/bs; 2085 b->nbs = B->cmap.n/bs; 2086 b->Mbs = B->rmap.N/bs; 2087 b->Nbs = B->cmap.N/bs; 2088 2089 for (i=0; i<=b->size; i++) { 2090 b->rangebs[i] = B->rmap.range[i]/bs; 2091 } 2092 b->rstartbs = B->rmap.rstart/bs; 2093 b->rendbs = B->rmap.rend/bs; 2094 b->cstartbs = B->cmap.rstart/bs; 2095 b->cendbs = B->cmap.rend/bs; 2096 2097 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2098 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 2099 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2100 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2101 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2102 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2103 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 2104 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2105 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2106 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2107 2108 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 2109 2110 PetscFunctionReturn(0); 2111 } 2112 EXTERN_C_END 2113 2114 EXTERN_C_BEGIN 2115 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2116 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2117 EXTERN_C_END 2118 2119 /*MC 2120 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2121 2122 Options Database Keys: 2123 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2124 . -mat_block_size <bs> - set the blocksize used to store the matrix 2125 - -mat_use_hash_table <fact> 2126 2127 Level: beginner 2128 2129 .seealso: MatCreateMPIBAIJ 2130 M*/ 2131 2132 EXTERN_C_BEGIN 2133 #undef __FUNCT__ 2134 #define __FUNCT__ "MatCreate_MPIBAIJ" 2135 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 2136 { 2137 Mat_MPIBAIJ *b; 2138 PetscErrorCode ierr; 2139 PetscTruth flg; 2140 2141 PetscFunctionBegin; 2142 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2143 B->data = (void*)b; 2144 2145 2146 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2147 B->mapping = 0; 2148 B->factor = 0; 2149 B->assembled = PETSC_FALSE; 2150 2151 B->insertmode = NOT_SET_VALUES; 2152 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 2153 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 2154 2155 /* build local table of row and column ownerships */ 2156 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 2157 2158 /* build cache for off array entries formed */ 2159 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 2160 b->donotstash = PETSC_FALSE; 2161 b->colmap = PETSC_NULL; 2162 b->garray = PETSC_NULL; 2163 b->roworiented = PETSC_TRUE; 2164 2165 /* stuff used in block assembly */ 2166 b->barray = 0; 2167 2168 /* stuff used for matrix vector multiply */ 2169 b->lvec = 0; 2170 b->Mvctx = 0; 2171 2172 /* stuff for MatGetRow() */ 2173 b->rowindices = 0; 2174 b->rowvalues = 0; 2175 b->getrowactive = PETSC_FALSE; 2176 2177 /* hash table stuff */ 2178 b->ht = 0; 2179 b->hd = 0; 2180 b->ht_size = 0; 2181 b->ht_flag = PETSC_FALSE; 2182 b->ht_fact = 0; 2183 b->ht_total_ct = 0; 2184 b->ht_insert_ct = 0; 2185 2186 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 2187 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2188 if (flg) { 2189 PetscReal fact = 1.39; 2190 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2191 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 2192 if (fact <= 1.0) fact = 1.39; 2193 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2194 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2195 } 2196 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2197 2198 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2199 "MatStoreValues_MPIBAIJ", 2200 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2201 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2202 "MatRetrieveValues_MPIBAIJ", 2203 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2204 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2205 "MatGetDiagonalBlock_MPIBAIJ", 2206 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2207 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2208 "MatMPIBAIJSetPreallocation_MPIBAIJ", 2209 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2210 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 2211 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 2212 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 2213 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 2214 "MatDiagonalScaleLocal_MPIBAIJ", 2215 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2216 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 2217 "MatSetHashTableFactor_MPIBAIJ", 2218 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2219 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 2220 PetscFunctionReturn(0); 2221 } 2222 EXTERN_C_END 2223 2224 /*MC 2225 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2226 2227 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2228 and MATMPIBAIJ otherwise. 2229 2230 Options Database Keys: 2231 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2232 2233 Level: beginner 2234 2235 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2236 M*/ 2237 2238 EXTERN_C_BEGIN 2239 #undef __FUNCT__ 2240 #define __FUNCT__ "MatCreate_BAIJ" 2241 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 2242 { 2243 PetscErrorCode ierr; 2244 PetscMPIInt size; 2245 2246 PetscFunctionBegin; 2247 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2248 if (size == 1) { 2249 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 2250 } else { 2251 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 2252 } 2253 PetscFunctionReturn(0); 2254 } 2255 EXTERN_C_END 2256 2257 #undef __FUNCT__ 2258 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2259 /*@C 2260 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2261 (block compressed row). For good matrix assembly performance 2262 the user should preallocate the matrix storage by setting the parameters 2263 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2264 performance can be increased by more than a factor of 50. 2265 2266 Collective on Mat 2267 2268 Input Parameters: 2269 + A - the matrix 2270 . bs - size of blockk 2271 . d_nz - number of block nonzeros per block row in diagonal portion of local 2272 submatrix (same for all local rows) 2273 . d_nnz - array containing the number of block nonzeros in the various block rows 2274 of the in diagonal portion of the local (possibly different for each block 2275 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2276 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2277 submatrix (same for all local rows). 2278 - o_nnz - array containing the number of nonzeros in the various block rows of the 2279 off-diagonal portion of the local submatrix (possibly different for 2280 each block row) or PETSC_NULL. 2281 2282 If the *_nnz parameter is given then the *_nz parameter is ignored 2283 2284 Options Database Keys: 2285 + -mat_block_size - size of the blocks to use 2286 - -mat_use_hash_table <fact> 2287 2288 Notes: 2289 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2290 than it must be used on all processors that share the object for that argument. 2291 2292 Storage Information: 2293 For a square global matrix we define each processor's diagonal portion 2294 to be its local rows and the corresponding columns (a square submatrix); 2295 each processor's off-diagonal portion encompasses the remainder of the 2296 local matrix (a rectangular submatrix). 2297 2298 The user can specify preallocated storage for the diagonal part of 2299 the local submatrix with either d_nz or d_nnz (not both). Set 2300 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2301 memory allocation. Likewise, specify preallocated storage for the 2302 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2303 2304 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2305 the figure below we depict these three local rows and all columns (0-11). 2306 2307 .vb 2308 0 1 2 3 4 5 6 7 8 9 10 11 2309 ------------------- 2310 row 3 | o o o d d d o o o o o o 2311 row 4 | o o o d d d o o o o o o 2312 row 5 | o o o d d d o o o o o o 2313 ------------------- 2314 .ve 2315 2316 Thus, any entries in the d locations are stored in the d (diagonal) 2317 submatrix, and any entries in the o locations are stored in the 2318 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2319 stored simply in the MATSEQBAIJ format for compressed row storage. 2320 2321 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2322 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2323 In general, for PDE problems in which most nonzeros are near the diagonal, 2324 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2325 or you will get TERRIBLE performance; see the users' manual chapter on 2326 matrices. 2327 2328 You can call MatGetInfo() to get information on how effective the preallocation was; 2329 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2330 You can also run with the option -info and look for messages with the string 2331 malloc in them to see if additional memory allocation was needed. 2332 2333 Level: intermediate 2334 2335 .keywords: matrix, block, aij, compressed row, sparse, parallel 2336 2337 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 2338 @*/ 2339 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2340 { 2341 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2342 2343 PetscFunctionBegin; 2344 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2345 if (f) { 2346 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2347 } 2348 PetscFunctionReturn(0); 2349 } 2350 2351 #undef __FUNCT__ 2352 #define __FUNCT__ "MatCreateMPIBAIJ" 2353 /*@C 2354 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 2355 (block compressed row). For good matrix assembly performance 2356 the user should preallocate the matrix storage by setting the parameters 2357 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2358 performance can be increased by more than a factor of 50. 2359 2360 Collective on MPI_Comm 2361 2362 Input Parameters: 2363 + comm - MPI communicator 2364 . bs - size of blockk 2365 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2366 This value should be the same as the local size used in creating the 2367 y vector for the matrix-vector product y = Ax. 2368 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2369 This value should be the same as the local size used in creating the 2370 x vector for the matrix-vector product y = Ax. 2371 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2372 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2373 . d_nz - number of nonzero blocks per block row in diagonal portion of local 2374 submatrix (same for all local rows) 2375 . d_nnz - array containing the number of nonzero blocks in the various block rows 2376 of the in diagonal portion of the local (possibly different for each block 2377 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2378 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 2379 submatrix (same for all local rows). 2380 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 2381 off-diagonal portion of the local submatrix (possibly different for 2382 each block row) or PETSC_NULL. 2383 2384 Output Parameter: 2385 . A - the matrix 2386 2387 Options Database Keys: 2388 + -mat_block_size - size of the blocks to use 2389 - -mat_use_hash_table <fact> 2390 2391 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2392 MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely 2393 true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles. 2394 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2395 2396 Notes: 2397 If the *_nnz parameter is given then the *_nz parameter is ignored 2398 2399 A nonzero block is any block that as 1 or more nonzeros in it 2400 2401 The user MUST specify either the local or global matrix dimensions 2402 (possibly both). 2403 2404 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2405 than it must be used on all processors that share the object for that argument. 2406 2407 Storage Information: 2408 For a square global matrix we define each processor's diagonal portion 2409 to be its local rows and the corresponding columns (a square submatrix); 2410 each processor's off-diagonal portion encompasses the remainder of the 2411 local matrix (a rectangular submatrix). 2412 2413 The user can specify preallocated storage for the diagonal part of 2414 the local submatrix with either d_nz or d_nnz (not both). Set 2415 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2416 memory allocation. Likewise, specify preallocated storage for the 2417 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2418 2419 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2420 the figure below we depict these three local rows and all columns (0-11). 2421 2422 .vb 2423 0 1 2 3 4 5 6 7 8 9 10 11 2424 ------------------- 2425 row 3 | o o o d d d o o o o o o 2426 row 4 | o o o d d d o o o o o o 2427 row 5 | o o o d d d o o o o o o 2428 ------------------- 2429 .ve 2430 2431 Thus, any entries in the d locations are stored in the d (diagonal) 2432 submatrix, and any entries in the o locations are stored in the 2433 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2434 stored simply in the MATSEQBAIJ format for compressed row storage. 2435 2436 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2437 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2438 In general, for PDE problems in which most nonzeros are near the diagonal, 2439 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2440 or you will get TERRIBLE performance; see the users' manual chapter on 2441 matrices. 2442 2443 Level: intermediate 2444 2445 .keywords: matrix, block, aij, compressed row, sparse, parallel 2446 2447 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2448 @*/ 2449 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2450 { 2451 PetscErrorCode ierr; 2452 PetscMPIInt size; 2453 2454 PetscFunctionBegin; 2455 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2456 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2457 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2458 if (size > 1) { 2459 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2460 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2461 } else { 2462 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2463 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2464 } 2465 PetscFunctionReturn(0); 2466 } 2467 2468 #undef __FUNCT__ 2469 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 2470 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2471 { 2472 Mat mat; 2473 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2474 PetscErrorCode ierr; 2475 PetscInt len=0; 2476 2477 PetscFunctionBegin; 2478 *newmat = 0; 2479 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2480 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 2481 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2482 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2483 2484 mat->factor = matin->factor; 2485 mat->preallocated = PETSC_TRUE; 2486 mat->assembled = PETSC_TRUE; 2487 mat->insertmode = NOT_SET_VALUES; 2488 2489 a = (Mat_MPIBAIJ*)mat->data; 2490 mat->rmap.bs = matin->rmap.bs; 2491 a->bs2 = oldmat->bs2; 2492 a->mbs = oldmat->mbs; 2493 a->nbs = oldmat->nbs; 2494 a->Mbs = oldmat->Mbs; 2495 a->Nbs = oldmat->Nbs; 2496 2497 ierr = PetscMapCopy(((PetscObject)matin)->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 2498 ierr = PetscMapCopy(((PetscObject)matin)->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 2499 2500 a->size = oldmat->size; 2501 a->rank = oldmat->rank; 2502 a->donotstash = oldmat->donotstash; 2503 a->roworiented = oldmat->roworiented; 2504 a->rowindices = 0; 2505 a->rowvalues = 0; 2506 a->getrowactive = PETSC_FALSE; 2507 a->barray = 0; 2508 a->rstartbs = oldmat->rstartbs; 2509 a->rendbs = oldmat->rendbs; 2510 a->cstartbs = oldmat->cstartbs; 2511 a->cendbs = oldmat->cendbs; 2512 2513 /* hash table stuff */ 2514 a->ht = 0; 2515 a->hd = 0; 2516 a->ht_size = 0; 2517 a->ht_flag = oldmat->ht_flag; 2518 a->ht_fact = oldmat->ht_fact; 2519 a->ht_total_ct = 0; 2520 a->ht_insert_ct = 0; 2521 2522 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 2523 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr); 2524 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr); 2525 if (oldmat->colmap) { 2526 #if defined (PETSC_USE_CTABLE) 2527 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2528 #else 2529 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2530 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2531 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2532 #endif 2533 } else a->colmap = 0; 2534 2535 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2536 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2537 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2538 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2539 } else a->garray = 0; 2540 2541 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2542 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2543 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2544 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2545 2546 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2547 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2548 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2549 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2550 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2551 *newmat = mat; 2552 2553 PetscFunctionReturn(0); 2554 } 2555 2556 #include "petscsys.h" 2557 2558 #undef __FUNCT__ 2559 #define __FUNCT__ "MatLoad_MPIBAIJ" 2560 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, MatType type,Mat *newmat) 2561 { 2562 Mat A; 2563 PetscErrorCode ierr; 2564 int fd; 2565 PetscInt i,nz,j,rstart,rend; 2566 PetscScalar *vals,*buf; 2567 MPI_Comm comm = ((PetscObject)viewer)->comm; 2568 MPI_Status status; 2569 PetscMPIInt rank,size,maxnz; 2570 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 2571 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 2572 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 2573 PetscMPIInt tag = ((PetscObject)viewer)->tag; 2574 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 2575 PetscInt dcount,kmax,k,nzcount,tmp,mend; 2576 2577 PetscFunctionBegin; 2578 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 2579 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2580 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2581 2582 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2583 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2584 if (!rank) { 2585 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2586 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2587 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2588 } 2589 2590 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2591 M = header[1]; N = header[2]; 2592 2593 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2594 2595 /* 2596 This code adds extra rows to make sure the number of rows is 2597 divisible by the blocksize 2598 */ 2599 Mbs = M/bs; 2600 extra_rows = bs - M + bs*Mbs; 2601 if (extra_rows == bs) extra_rows = 0; 2602 else Mbs++; 2603 if (extra_rows && !rank) { 2604 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2605 } 2606 2607 /* determine ownership of all rows */ 2608 mbs = Mbs/size + ((Mbs % size) > rank); 2609 m = mbs*bs; 2610 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 2611 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2612 2613 /* process 0 needs enough room for process with most rows */ 2614 if (!rank) { 2615 mmax = rowners[1]; 2616 for (i=2; i<size; i++) { 2617 mmax = PetscMax(mmax,rowners[i]); 2618 } 2619 mmax*=bs; 2620 } else mmax = m; 2621 2622 rowners[0] = 0; 2623 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2624 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2625 rstart = rowners[rank]; 2626 rend = rowners[rank+1]; 2627 2628 /* distribute row lengths to all processors */ 2629 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 2630 if (!rank) { 2631 mend = m; 2632 if (size == 1) mend = mend - extra_rows; 2633 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 2634 for (j=mend; j<m; j++) locrowlens[j] = 1; 2635 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2636 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2637 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2638 for (j=0; j<m; j++) { 2639 procsnz[0] += locrowlens[j]; 2640 } 2641 for (i=1; i<size; i++) { 2642 mend = browners[i+1] - browners[i]; 2643 if (i == size-1) mend = mend - extra_rows; 2644 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 2645 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 2646 /* calculate the number of nonzeros on each processor */ 2647 for (j=0; j<browners[i+1]-browners[i]; j++) { 2648 procsnz[i] += rowlengths[j]; 2649 } 2650 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2651 } 2652 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2653 } else { 2654 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2655 } 2656 2657 if (!rank) { 2658 /* determine max buffer needed and allocate it */ 2659 maxnz = procsnz[0]; 2660 for (i=1; i<size; i++) { 2661 maxnz = PetscMax(maxnz,procsnz[i]); 2662 } 2663 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2664 2665 /* read in my part of the matrix column indices */ 2666 nz = procsnz[0]; 2667 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2668 mycols = ibuf; 2669 if (size == 1) nz -= extra_rows; 2670 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2671 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2672 2673 /* read in every ones (except the last) and ship off */ 2674 for (i=1; i<size-1; i++) { 2675 nz = procsnz[i]; 2676 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2677 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2678 } 2679 /* read in the stuff for the last proc */ 2680 if (size != 1) { 2681 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2682 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2683 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2684 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2685 } 2686 ierr = PetscFree(cols);CHKERRQ(ierr); 2687 } else { 2688 /* determine buffer space needed for message */ 2689 nz = 0; 2690 for (i=0; i<m; i++) { 2691 nz += locrowlens[i]; 2692 } 2693 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2694 mycols = ibuf; 2695 /* receive message of column indices*/ 2696 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2697 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2698 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2699 } 2700 2701 /* loop over local rows, determining number of off diagonal entries */ 2702 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2703 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2704 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2705 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2706 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2707 rowcount = 0; nzcount = 0; 2708 for (i=0; i<mbs; i++) { 2709 dcount = 0; 2710 odcount = 0; 2711 for (j=0; j<bs; j++) { 2712 kmax = locrowlens[rowcount]; 2713 for (k=0; k<kmax; k++) { 2714 tmp = mycols[nzcount++]/bs; 2715 if (!mask[tmp]) { 2716 mask[tmp] = 1; 2717 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2718 else masked1[dcount++] = tmp; 2719 } 2720 } 2721 rowcount++; 2722 } 2723 2724 dlens[i] = dcount; 2725 odlens[i] = odcount; 2726 2727 /* zero out the mask elements we set */ 2728 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2729 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2730 } 2731 2732 /* create our matrix */ 2733 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2734 ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2735 ierr = MatSetType(A,type);CHKERRQ(ierr) 2736 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2737 2738 if (!rank) { 2739 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2740 /* read in my part of the matrix numerical values */ 2741 nz = procsnz[0]; 2742 vals = buf; 2743 mycols = ibuf; 2744 if (size == 1) nz -= extra_rows; 2745 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2746 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2747 2748 /* insert into matrix */ 2749 jj = rstart*bs; 2750 for (i=0; i<m; i++) { 2751 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2752 mycols += locrowlens[i]; 2753 vals += locrowlens[i]; 2754 jj++; 2755 } 2756 /* read in other processors (except the last one) and ship out */ 2757 for (i=1; i<size-1; i++) { 2758 nz = procsnz[i]; 2759 vals = buf; 2760 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2761 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2762 } 2763 /* the last proc */ 2764 if (size != 1){ 2765 nz = procsnz[i] - extra_rows; 2766 vals = buf; 2767 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2768 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2769 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2770 } 2771 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2772 } else { 2773 /* receive numeric values */ 2774 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2775 2776 /* receive message of values*/ 2777 vals = buf; 2778 mycols = ibuf; 2779 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2780 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2781 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2782 2783 /* insert into matrix */ 2784 jj = rstart*bs; 2785 for (i=0; i<m; i++) { 2786 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2787 mycols += locrowlens[i]; 2788 vals += locrowlens[i]; 2789 jj++; 2790 } 2791 } 2792 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2793 ierr = PetscFree(buf);CHKERRQ(ierr); 2794 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2795 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2796 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2797 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2798 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2799 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2800 2801 *newmat = A; 2802 PetscFunctionReturn(0); 2803 } 2804 2805 #undef __FUNCT__ 2806 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2807 /*@ 2808 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2809 2810 Input Parameters: 2811 . mat - the matrix 2812 . fact - factor 2813 2814 Collective on Mat 2815 2816 Level: advanced 2817 2818 Notes: 2819 This can also be set by the command line option: -mat_use_hash_table <fact> 2820 2821 .keywords: matrix, hashtable, factor, HT 2822 2823 .seealso: MatSetOption() 2824 @*/ 2825 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2826 { 2827 PetscErrorCode ierr,(*f)(Mat,PetscReal); 2828 2829 PetscFunctionBegin; 2830 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 2831 if (f) { 2832 ierr = (*f)(mat,fact);CHKERRQ(ierr); 2833 } 2834 PetscFunctionReturn(0); 2835 } 2836 2837 EXTERN_C_BEGIN 2838 #undef __FUNCT__ 2839 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 2840 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 2841 { 2842 Mat_MPIBAIJ *baij; 2843 2844 PetscFunctionBegin; 2845 baij = (Mat_MPIBAIJ*)mat->data; 2846 baij->ht_fact = fact; 2847 PetscFunctionReturn(0); 2848 } 2849 EXTERN_C_END 2850 2851 #undef __FUNCT__ 2852 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 2853 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2854 { 2855 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2856 PetscFunctionBegin; 2857 *Ad = a->A; 2858 *Ao = a->B; 2859 *colmap = a->garray; 2860 PetscFunctionReturn(0); 2861 } 2862 2863 /* 2864 Special version for direct calls from Fortran (to eliminate two function call overheads 2865 */ 2866 #if defined(PETSC_HAVE_FORTRAN_CAPS) 2867 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 2868 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 2869 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 2870 #endif 2871 2872 #undef __FUNCT__ 2873 #define __FUNCT__ "matmpibiajsetvaluesblocked" 2874 /*@C 2875 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 2876 2877 Collective on Mat 2878 2879 Input Parameters: 2880 + mat - the matrix 2881 . min - number of input rows 2882 . im - input rows 2883 . nin - number of input columns 2884 . in - input columns 2885 . v - numerical values input 2886 - addvin - INSERT_VALUES or ADD_VALUES 2887 2888 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 2889 2890 Level: advanced 2891 2892 .seealso: MatSetValuesBlocked() 2893 @*/ 2894 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 2895 { 2896 /* convert input arguments to C version */ 2897 Mat mat = *matin; 2898 PetscInt m = *min, n = *nin; 2899 InsertMode addv = *addvin; 2900 2901 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2902 const MatScalar *value; 2903 MatScalar *barray=baij->barray; 2904 PetscTruth roworiented = baij->roworiented; 2905 PetscErrorCode ierr; 2906 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 2907 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 2908 PetscInt cend=baij->cendbs,bs=mat->rmap.bs,bs2=baij->bs2; 2909 2910 PetscFunctionBegin; 2911 /* tasks normally handled by MatSetValuesBlocked() */ 2912 if (mat->insertmode == NOT_SET_VALUES) { 2913 mat->insertmode = addv; 2914 } 2915 #if defined(PETSC_USE_DEBUG) 2916 else if (mat->insertmode != addv) { 2917 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 2918 } 2919 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2920 #endif 2921 if (mat->assembled) { 2922 mat->was_assembled = PETSC_TRUE; 2923 mat->assembled = PETSC_FALSE; 2924 } 2925 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 2926 2927 2928 if(!barray) { 2929 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 2930 baij->barray = barray; 2931 } 2932 2933 if (roworiented) { 2934 stepval = (n-1)*bs; 2935 } else { 2936 stepval = (m-1)*bs; 2937 } 2938 for (i=0; i<m; i++) { 2939 if (im[i] < 0) continue; 2940 #if defined(PETSC_USE_DEBUG) 2941 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 2942 #endif 2943 if (im[i] >= rstart && im[i] < rend) { 2944 row = im[i] - rstart; 2945 for (j=0; j<n; j++) { 2946 /* If NumCol = 1 then a copy is not required */ 2947 if ((roworiented) && (n == 1)) { 2948 barray = (MatScalar*)v + i*bs2; 2949 } else if((!roworiented) && (m == 1)) { 2950 barray = (MatScalar*)v + j*bs2; 2951 } else { /* Here a copy is required */ 2952 if (roworiented) { 2953 value = v + i*(stepval+bs)*bs + j*bs; 2954 } else { 2955 value = v + j*(stepval+bs)*bs + i*bs; 2956 } 2957 for (ii=0; ii<bs; ii++,value+=stepval) { 2958 for (jj=0; jj<bs; jj++) { 2959 *barray++ = *value++; 2960 } 2961 } 2962 barray -=bs2; 2963 } 2964 2965 if (in[j] >= cstart && in[j] < cend){ 2966 col = in[j] - cstart; 2967 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 2968 } 2969 else if (in[j] < 0) continue; 2970 #if defined(PETSC_USE_DEBUG) 2971 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 2972 #endif 2973 else { 2974 if (mat->was_assembled) { 2975 if (!baij->colmap) { 2976 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2977 } 2978 2979 #if defined(PETSC_USE_DEBUG) 2980 #if defined (PETSC_USE_CTABLE) 2981 { PetscInt data; 2982 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 2983 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 2984 } 2985 #else 2986 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 2987 #endif 2988 #endif 2989 #if defined (PETSC_USE_CTABLE) 2990 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 2991 col = (col - 1)/bs; 2992 #else 2993 col = (baij->colmap[in[j]] - 1)/bs; 2994 #endif 2995 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 2996 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 2997 col = in[j]; 2998 } 2999 } 3000 else col = in[j]; 3001 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3002 } 3003 } 3004 } else { 3005 if (!baij->donotstash) { 3006 if (roworiented) { 3007 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3008 } else { 3009 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3010 } 3011 } 3012 } 3013 } 3014 3015 /* task normally handled by MatSetValuesBlocked() */ 3016 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3017 PetscFunctionReturn(0); 3018 } 3019