1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 7 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 8 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 9 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 10 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 11 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 12 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 13 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar); 16 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 19 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 20 { 21 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 22 PetscErrorCode ierr; 23 PetscInt i,*idxb = 0; 24 PetscScalar *va,*vb; 25 Vec vtmp; 26 27 PetscFunctionBegin; 28 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 29 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 30 if (idx) { 31 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 32 } 33 34 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 35 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 36 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 37 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 38 39 for (i=0; i<A->rmap->n; i++){ 40 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 41 } 42 43 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 44 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 45 if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);} 46 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 47 PetscFunctionReturn(0); 48 } 49 50 EXTERN_C_BEGIN 51 #undef __FUNCT__ 52 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 53 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 54 { 55 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 56 PetscErrorCode ierr; 57 58 PetscFunctionBegin; 59 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 60 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 61 PetscFunctionReturn(0); 62 } 63 EXTERN_C_END 64 65 EXTERN_C_BEGIN 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 68 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 69 { 70 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 75 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 76 PetscFunctionReturn(0); 77 } 78 EXTERN_C_END 79 80 /* 81 Local utility routine that creates a mapping from the global column 82 number to the local number in the off-diagonal part of the local 83 storage of the matrix. This is done in a non scalable way since the 84 length of colmap equals the global matrix length. 85 */ 86 #undef __FUNCT__ 87 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 88 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 89 { 90 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 91 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 92 PetscErrorCode ierr; 93 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 94 95 PetscFunctionBegin; 96 #if defined (PETSC_USE_CTABLE) 97 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 98 for (i=0; i<nbs; i++){ 99 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 100 } 101 #else 102 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 103 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 104 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 105 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 106 #endif 107 PetscFunctionReturn(0); 108 } 109 110 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 111 { \ 112 \ 113 brow = row/bs; \ 114 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 115 rmax = aimax[brow]; nrow = ailen[brow]; \ 116 bcol = col/bs; \ 117 ridx = row % bs; cidx = col % bs; \ 118 low = 0; high = nrow; \ 119 while (high-low > 3) { \ 120 t = (low+high)/2; \ 121 if (rp[t] > bcol) high = t; \ 122 else low = t; \ 123 } \ 124 for (_i=low; _i<high; _i++) { \ 125 if (rp[_i] > bcol) break; \ 126 if (rp[_i] == bcol) { \ 127 bap = ap + bs2*_i + bs*cidx + ridx; \ 128 if (addv == ADD_VALUES) *bap += value; \ 129 else *bap = value; \ 130 goto a_noinsert; \ 131 } \ 132 } \ 133 if (a->nonew == 1) goto a_noinsert; \ 134 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 135 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 136 N = nrow++ - 1; \ 137 /* shift up all the later entries in this row */ \ 138 for (ii=N; ii>=_i; ii--) { \ 139 rp[ii+1] = rp[ii]; \ 140 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 141 } \ 142 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 143 rp[_i] = bcol; \ 144 ap[bs2*_i + bs*cidx + ridx] = value; \ 145 a_noinsert:; \ 146 ailen[brow] = nrow; \ 147 } 148 149 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 150 { \ 151 brow = row/bs; \ 152 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 153 rmax = bimax[brow]; nrow = bilen[brow]; \ 154 bcol = col/bs; \ 155 ridx = row % bs; cidx = col % bs; \ 156 low = 0; high = nrow; \ 157 while (high-low > 3) { \ 158 t = (low+high)/2; \ 159 if (rp[t] > bcol) high = t; \ 160 else low = t; \ 161 } \ 162 for (_i=low; _i<high; _i++) { \ 163 if (rp[_i] > bcol) break; \ 164 if (rp[_i] == bcol) { \ 165 bap = ap + bs2*_i + bs*cidx + ridx; \ 166 if (addv == ADD_VALUES) *bap += value; \ 167 else *bap = value; \ 168 goto b_noinsert; \ 169 } \ 170 } \ 171 if (b->nonew == 1) goto b_noinsert; \ 172 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 173 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 174 CHKMEMQ;\ 175 N = nrow++ - 1; \ 176 /* shift up all the later entries in this row */ \ 177 for (ii=N; ii>=_i; ii--) { \ 178 rp[ii+1] = rp[ii]; \ 179 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 180 } \ 181 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 182 rp[_i] = bcol; \ 183 ap[bs2*_i + bs*cidx + ridx] = value; \ 184 b_noinsert:; \ 185 bilen[brow] = nrow; \ 186 } 187 188 #undef __FUNCT__ 189 #define __FUNCT__ "MatSetValues_MPIBAIJ" 190 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 191 { 192 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 193 MatScalar value; 194 PetscTruth roworiented = baij->roworiented; 195 PetscErrorCode ierr; 196 PetscInt i,j,row,col; 197 PetscInt rstart_orig=mat->rmap->rstart; 198 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 199 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 200 201 /* Some Variables required in the macro */ 202 Mat A = baij->A; 203 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 204 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 205 MatScalar *aa=a->a; 206 207 Mat B = baij->B; 208 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 209 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 210 MatScalar *ba=b->a; 211 212 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 213 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 214 MatScalar *ap,*bap; 215 216 PetscFunctionBegin; 217 if (v) PetscValidScalarPointer(v,6); 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_DEBUG) 221 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 222 #endif 223 if (im[i] >= rstart_orig && im[i] < rend_orig) { 224 row = im[i] - rstart_orig; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart_orig && in[j] < cend_orig){ 227 col = in[j] - cstart_orig; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 230 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 231 } else if (in[j] < 0) continue; 232 #if defined(PETSC_USE_DEBUG) 233 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap->N-1); 234 #endif 235 else { 236 if (mat->was_assembled) { 237 if (!baij->colmap) { 238 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 239 } 240 #if defined (PETSC_USE_CTABLE) 241 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 242 col = col - 1; 243 #else 244 col = baij->colmap[in[j]/bs] - 1; 245 #endif 246 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 247 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 248 col = in[j]; 249 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 250 B = baij->B; 251 b = (Mat_SeqBAIJ*)(B)->data; 252 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 253 ba=b->a; 254 } else col += in[j]%bs; 255 } else col = in[j]; 256 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 257 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 258 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 259 } 260 } 261 } else { 262 if (!baij->donotstash) { 263 if (roworiented) { 264 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 265 } else { 266 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 267 } 268 } 269 } 270 } 271 PetscFunctionReturn(0); 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 276 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 277 { 278 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 279 const PetscScalar *value; 280 MatScalar *barray=baij->barray; 281 PetscTruth roworiented = baij->roworiented; 282 PetscErrorCode ierr; 283 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 284 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 285 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 286 287 PetscFunctionBegin; 288 if(!barray) { 289 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 290 baij->barray = barray; 291 } 292 293 if (roworiented) { 294 stepval = (n-1)*bs; 295 } else { 296 stepval = (m-1)*bs; 297 } 298 for (i=0; i<m; i++) { 299 if (im[i] < 0) continue; 300 #if defined(PETSC_USE_DEBUG) 301 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 302 #endif 303 if (im[i] >= rstart && im[i] < rend) { 304 row = im[i] - rstart; 305 for (j=0; j<n; j++) { 306 /* If NumCol = 1 then a copy is not required */ 307 if ((roworiented) && (n == 1)) { 308 barray = (MatScalar*)v + i*bs2; 309 } else if((!roworiented) && (m == 1)) { 310 barray = (MatScalar*)v + j*bs2; 311 } else { /* Here a copy is required */ 312 if (roworiented) { 313 value = v + (i*(stepval+bs) + j)*bs; 314 } else { 315 value = v + (j*(stepval+bs) + i)*bs; 316 } 317 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 318 for (jj=0; jj<bs; jj++) { 319 barray[jj] = value[jj]; 320 } 321 barray += bs; 322 } 323 barray -= bs2; 324 } 325 326 if (in[j] >= cstart && in[j] < cend){ 327 col = in[j] - cstart; 328 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 329 } 330 else if (in[j] < 0) continue; 331 #if defined(PETSC_USE_DEBUG) 332 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 333 #endif 334 else { 335 if (mat->was_assembled) { 336 if (!baij->colmap) { 337 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 338 } 339 340 #if defined(PETSC_USE_DEBUG) 341 #if defined (PETSC_USE_CTABLE) 342 { PetscInt data; 343 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 344 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 345 } 346 #else 347 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 348 #endif 349 #endif 350 #if defined (PETSC_USE_CTABLE) 351 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 352 col = (col - 1)/bs; 353 #else 354 col = (baij->colmap[in[j]] - 1)/bs; 355 #endif 356 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 357 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 358 col = in[j]; 359 } 360 } 361 else col = in[j]; 362 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 363 } 364 } 365 } else { 366 if (!baij->donotstash) { 367 if (roworiented) { 368 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 369 } else { 370 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 371 } 372 } 373 } 374 } 375 PetscFunctionReturn(0); 376 } 377 378 #define HASH_KEY 0.6180339887 379 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 380 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 381 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 382 #undef __FUNCT__ 383 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 384 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 385 { 386 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 387 PetscTruth roworiented = baij->roworiented; 388 PetscErrorCode ierr; 389 PetscInt i,j,row,col; 390 PetscInt rstart_orig=mat->rmap->rstart; 391 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 392 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 393 PetscReal tmp; 394 MatScalar **HD = baij->hd,value; 395 #if defined(PETSC_USE_DEBUG) 396 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 397 #endif 398 399 PetscFunctionBegin; 400 if (v) PetscValidScalarPointer(v,6); 401 for (i=0; i<m; i++) { 402 #if defined(PETSC_USE_DEBUG) 403 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 404 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 405 #endif 406 row = im[i]; 407 if (row >= rstart_orig && row < rend_orig) { 408 for (j=0; j<n; j++) { 409 col = in[j]; 410 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 411 /* Look up PetscInto the Hash Table */ 412 key = (row/bs)*Nbs+(col/bs)+1; 413 h1 = HASH(size,key,tmp); 414 415 416 idx = h1; 417 #if defined(PETSC_USE_DEBUG) 418 insert_ct++; 419 total_ct++; 420 if (HT[idx] != key) { 421 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 422 if (idx == size) { 423 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 424 if (idx == h1) { 425 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 426 } 427 } 428 } 429 #else 430 if (HT[idx] != key) { 431 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 432 if (idx == size) { 433 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 434 if (idx == h1) { 435 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 436 } 437 } 438 } 439 #endif 440 /* A HASH table entry is found, so insert the values at the correct address */ 441 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 442 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 443 } 444 } else { 445 if (!baij->donotstash) { 446 if (roworiented) { 447 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 448 } else { 449 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 450 } 451 } 452 } 453 } 454 #if defined(PETSC_USE_DEBUG) 455 baij->ht_total_ct = total_ct; 456 baij->ht_insert_ct = insert_ct; 457 #endif 458 PetscFunctionReturn(0); 459 } 460 461 #undef __FUNCT__ 462 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 463 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 464 { 465 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 466 PetscTruth roworiented = baij->roworiented; 467 PetscErrorCode ierr; 468 PetscInt i,j,ii,jj,row,col; 469 PetscInt rstart=baij->rstartbs; 470 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 471 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 472 PetscReal tmp; 473 MatScalar **HD = baij->hd,*baij_a; 474 const PetscScalar *v_t,*value; 475 #if defined(PETSC_USE_DEBUG) 476 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 477 #endif 478 479 PetscFunctionBegin; 480 481 if (roworiented) { 482 stepval = (n-1)*bs; 483 } else { 484 stepval = (m-1)*bs; 485 } 486 for (i=0; i<m; i++) { 487 #if defined(PETSC_USE_DEBUG) 488 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 489 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 490 #endif 491 row = im[i]; 492 v_t = v + i*nbs2; 493 if (row >= rstart && row < rend) { 494 for (j=0; j<n; j++) { 495 col = in[j]; 496 497 /* Look up into the Hash Table */ 498 key = row*Nbs+col+1; 499 h1 = HASH(size,key,tmp); 500 501 idx = h1; 502 #if defined(PETSC_USE_DEBUG) 503 total_ct++; 504 insert_ct++; 505 if (HT[idx] != key) { 506 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 507 if (idx == size) { 508 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 509 if (idx == h1) { 510 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 511 } 512 } 513 } 514 #else 515 if (HT[idx] != key) { 516 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 517 if (idx == size) { 518 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 519 if (idx == h1) { 520 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 521 } 522 } 523 } 524 #endif 525 baij_a = HD[idx]; 526 if (roworiented) { 527 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 528 /* value = v + (i*(stepval+bs)+j)*bs; */ 529 value = v_t; 530 v_t += bs; 531 if (addv == ADD_VALUES) { 532 for (ii=0; ii<bs; ii++,value+=stepval) { 533 for (jj=ii; jj<bs2; jj+=bs) { 534 baij_a[jj] += *value++; 535 } 536 } 537 } else { 538 for (ii=0; ii<bs; ii++,value+=stepval) { 539 for (jj=ii; jj<bs2; jj+=bs) { 540 baij_a[jj] = *value++; 541 } 542 } 543 } 544 } else { 545 value = v + j*(stepval+bs)*bs + i*bs; 546 if (addv == ADD_VALUES) { 547 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 548 for (jj=0; jj<bs; jj++) { 549 baij_a[jj] += *value++; 550 } 551 } 552 } else { 553 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 554 for (jj=0; jj<bs; jj++) { 555 baij_a[jj] = *value++; 556 } 557 } 558 } 559 } 560 } 561 } else { 562 if (!baij->donotstash) { 563 if (roworiented) { 564 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 565 } else { 566 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 567 } 568 } 569 } 570 } 571 #if defined(PETSC_USE_DEBUG) 572 baij->ht_total_ct = total_ct; 573 baij->ht_insert_ct = insert_ct; 574 #endif 575 PetscFunctionReturn(0); 576 } 577 578 #undef __FUNCT__ 579 #define __FUNCT__ "MatGetValues_MPIBAIJ" 580 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 581 { 582 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 583 PetscErrorCode ierr; 584 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 585 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 586 587 PetscFunctionBegin; 588 for (i=0; i<m; i++) { 589 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 590 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 591 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 592 row = idxm[i] - bsrstart; 593 for (j=0; j<n; j++) { 594 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 595 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 596 if (idxn[j] >= bscstart && idxn[j] < bscend){ 597 col = idxn[j] - bscstart; 598 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 599 } else { 600 if (!baij->colmap) { 601 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 602 } 603 #if defined (PETSC_USE_CTABLE) 604 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 605 data --; 606 #else 607 data = baij->colmap[idxn[j]/bs]-1; 608 #endif 609 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 610 else { 611 col = data + idxn[j]%bs; 612 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 613 } 614 } 615 } 616 } else { 617 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 618 } 619 } 620 PetscFunctionReturn(0); 621 } 622 623 #undef __FUNCT__ 624 #define __FUNCT__ "MatNorm_MPIBAIJ" 625 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 626 { 627 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 628 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 629 PetscErrorCode ierr; 630 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 631 PetscReal sum = 0.0; 632 MatScalar *v; 633 634 PetscFunctionBegin; 635 if (baij->size == 1) { 636 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 637 } else { 638 if (type == NORM_FROBENIUS) { 639 v = amat->a; 640 nz = amat->nz*bs2; 641 for (i=0; i<nz; i++) { 642 #if defined(PETSC_USE_COMPLEX) 643 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 644 #else 645 sum += (*v)*(*v); v++; 646 #endif 647 } 648 v = bmat->a; 649 nz = bmat->nz*bs2; 650 for (i=0; i<nz; i++) { 651 #if defined(PETSC_USE_COMPLEX) 652 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 653 #else 654 sum += (*v)*(*v); v++; 655 #endif 656 } 657 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 658 *nrm = sqrt(*nrm); 659 } else if (type == NORM_1) { /* max column sum */ 660 PetscReal *tmp,*tmp2; 661 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 662 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 663 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 664 v = amat->a; jj = amat->j; 665 for (i=0; i<amat->nz; i++) { 666 for (j=0; j<bs; j++){ 667 col = bs*(cstart + *jj) + j; /* column index */ 668 for (row=0; row<bs; row++){ 669 tmp[col] += PetscAbsScalar(*v); v++; 670 } 671 } 672 jj++; 673 } 674 v = bmat->a; jj = bmat->j; 675 for (i=0; i<bmat->nz; i++) { 676 for (j=0; j<bs; j++){ 677 col = bs*garray[*jj] + j; 678 for (row=0; row<bs; row++){ 679 tmp[col] += PetscAbsScalar(*v); v++; 680 } 681 } 682 jj++; 683 } 684 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 685 *nrm = 0.0; 686 for (j=0; j<mat->cmap->N; j++) { 687 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 688 } 689 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 690 } else if (type == NORM_INFINITY) { /* max row sum */ 691 PetscReal *sums; 692 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 693 sum = 0.0; 694 for (j=0; j<amat->mbs; j++) { 695 for (row=0; row<bs; row++) sums[row] = 0.0; 696 v = amat->a + bs2*amat->i[j]; 697 nz = amat->i[j+1]-amat->i[j]; 698 for (i=0; i<nz; i++) { 699 for (col=0; col<bs; col++){ 700 for (row=0; row<bs; row++){ 701 sums[row] += PetscAbsScalar(*v); v++; 702 } 703 } 704 } 705 v = bmat->a + bs2*bmat->i[j]; 706 nz = bmat->i[j+1]-bmat->i[j]; 707 for (i=0; i<nz; i++) { 708 for (col=0; col<bs; col++){ 709 for (row=0; row<bs; row++){ 710 sums[row] += PetscAbsScalar(*v); v++; 711 } 712 } 713 } 714 for (row=0; row<bs; row++){ 715 if (sums[row] > sum) sum = sums[row]; 716 } 717 } 718 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 719 ierr = PetscFree(sums);CHKERRQ(ierr); 720 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet"); 721 } 722 PetscFunctionReturn(0); 723 } 724 725 /* 726 Creates the hash table, and sets the table 727 This table is created only once. 728 If new entried need to be added to the matrix 729 then the hash table has to be destroyed and 730 recreated. 731 */ 732 #undef __FUNCT__ 733 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 734 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 735 { 736 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 737 Mat A = baij->A,B=baij->B; 738 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 739 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 740 PetscErrorCode ierr; 741 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 742 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 743 PetscInt *HT,key; 744 MatScalar **HD; 745 PetscReal tmp; 746 #if defined(PETSC_USE_INFO) 747 PetscInt ct=0,max=0; 748 #endif 749 750 PetscFunctionBegin; 751 if (baij->ht) PetscFunctionReturn(0); 752 753 baij->ht_size = (PetscInt)(factor*nz); 754 ht_size = baij->ht_size; 755 756 /* Allocate Memory for Hash Table */ 757 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 758 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 759 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 760 HD = baij->hd; 761 HT = baij->ht; 762 763 /* Loop Over A */ 764 for (i=0; i<a->mbs; i++) { 765 for (j=ai[i]; j<ai[i+1]; j++) { 766 row = i+rstart; 767 col = aj[j]+cstart; 768 769 key = row*Nbs + col + 1; 770 h1 = HASH(ht_size,key,tmp); 771 for (k=0; k<ht_size; k++){ 772 if (!HT[(h1+k)%ht_size]) { 773 HT[(h1+k)%ht_size] = key; 774 HD[(h1+k)%ht_size] = a->a + j*bs2; 775 break; 776 #if defined(PETSC_USE_INFO) 777 } else { 778 ct++; 779 #endif 780 } 781 } 782 #if defined(PETSC_USE_INFO) 783 if (k> max) max = k; 784 #endif 785 } 786 } 787 /* Loop Over B */ 788 for (i=0; i<b->mbs; i++) { 789 for (j=bi[i]; j<bi[i+1]; j++) { 790 row = i+rstart; 791 col = garray[bj[j]]; 792 key = row*Nbs + col + 1; 793 h1 = HASH(ht_size,key,tmp); 794 for (k=0; k<ht_size; k++){ 795 if (!HT[(h1+k)%ht_size]) { 796 HT[(h1+k)%ht_size] = key; 797 HD[(h1+k)%ht_size] = b->a + j*bs2; 798 break; 799 #if defined(PETSC_USE_INFO) 800 } else { 801 ct++; 802 #endif 803 } 804 } 805 #if defined(PETSC_USE_INFO) 806 if (k> max) max = k; 807 #endif 808 } 809 } 810 811 /* Print Summary */ 812 #if defined(PETSC_USE_INFO) 813 for (i=0,j=0; i<ht_size; i++) { 814 if (HT[i]) {j++;} 815 } 816 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 817 #endif 818 PetscFunctionReturn(0); 819 } 820 821 #undef __FUNCT__ 822 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 823 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 824 { 825 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 826 PetscErrorCode ierr; 827 PetscInt nstash,reallocs; 828 InsertMode addv; 829 830 PetscFunctionBegin; 831 if (baij->donotstash) { 832 PetscFunctionReturn(0); 833 } 834 835 /* make sure all processors are either in INSERTMODE or ADDMODE */ 836 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 837 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 838 mat->insertmode = addv; /* in case this processor had no cache */ 839 840 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 841 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 842 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 843 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 844 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 845 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 846 PetscFunctionReturn(0); 847 } 848 849 #undef __FUNCT__ 850 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 851 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 852 { 853 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 854 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 855 PetscErrorCode ierr; 856 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 857 PetscInt *row,*col; 858 PetscTruth r1,r2,r3,other_disassembled; 859 MatScalar *val; 860 InsertMode addv = mat->insertmode; 861 PetscMPIInt n; 862 863 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 864 PetscFunctionBegin; 865 if (!baij->donotstash) { 866 while (1) { 867 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 868 if (!flg) break; 869 870 for (i=0; i<n;) { 871 /* Now identify the consecutive vals belonging to the same row */ 872 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 873 if (j < n) ncols = j-i; 874 else ncols = n-i; 875 /* Now assemble all these values with a single function call */ 876 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 877 i = j; 878 } 879 } 880 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 881 /* Now process the block-stash. Since the values are stashed column-oriented, 882 set the roworiented flag to column oriented, and after MatSetValues() 883 restore the original flags */ 884 r1 = baij->roworiented; 885 r2 = a->roworiented; 886 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 887 baij->roworiented = PETSC_FALSE; 888 a->roworiented = PETSC_FALSE; 889 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 890 while (1) { 891 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 892 if (!flg) break; 893 894 for (i=0; i<n;) { 895 /* Now identify the consecutive vals belonging to the same row */ 896 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 897 if (j < n) ncols = j-i; 898 else ncols = n-i; 899 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 900 i = j; 901 } 902 } 903 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 904 baij->roworiented = r1; 905 a->roworiented = r2; 906 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 907 } 908 909 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 910 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 911 912 /* determine if any processor has disassembled, if so we must 913 also disassemble ourselfs, in order that we may reassemble. */ 914 /* 915 if nonzero structure of submatrix B cannot change then we know that 916 no processor disassembled thus we can skip this stuff 917 */ 918 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 919 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 920 if (mat->was_assembled && !other_disassembled) { 921 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 922 } 923 } 924 925 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 926 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 927 } 928 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 929 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 930 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 931 932 #if defined(PETSC_USE_INFO) 933 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 934 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 935 baij->ht_total_ct = 0; 936 baij->ht_insert_ct = 0; 937 } 938 #endif 939 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 940 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 941 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 942 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 943 } 944 945 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 946 baij->rowvalues = 0; 947 PetscFunctionReturn(0); 948 } 949 950 #undef __FUNCT__ 951 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 952 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 953 { 954 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 955 PetscErrorCode ierr; 956 PetscMPIInt size = baij->size,rank = baij->rank; 957 PetscInt bs = mat->rmap->bs; 958 PetscTruth iascii,isdraw; 959 PetscViewer sviewer; 960 PetscViewerFormat format; 961 962 PetscFunctionBegin; 963 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 964 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 965 if (iascii) { 966 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 967 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 968 MatInfo info; 969 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 970 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 971 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 972 rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 973 mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 974 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 975 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 976 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 977 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 978 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 979 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 980 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 981 PetscFunctionReturn(0); 982 } else if (format == PETSC_VIEWER_ASCII_INFO) { 983 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 984 PetscFunctionReturn(0); 985 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 986 PetscFunctionReturn(0); 987 } 988 } 989 990 if (isdraw) { 991 PetscDraw draw; 992 PetscTruth isnull; 993 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 994 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 995 } 996 997 if (size == 1) { 998 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 999 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1000 } else { 1001 /* assemble the entire matrix onto first processor. */ 1002 Mat A; 1003 Mat_SeqBAIJ *Aloc; 1004 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1005 MatScalar *a; 1006 1007 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1008 /* Perhaps this should be the type of mat? */ 1009 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1010 if (!rank) { 1011 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1012 } else { 1013 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1014 } 1015 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1016 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1017 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1018 1019 /* copy over the A part */ 1020 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1021 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1022 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1023 1024 for (i=0; i<mbs; i++) { 1025 rvals[0] = bs*(baij->rstartbs + i); 1026 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1027 for (j=ai[i]; j<ai[i+1]; j++) { 1028 col = (baij->cstartbs+aj[j])*bs; 1029 for (k=0; k<bs; k++) { 1030 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1031 col++; a += bs; 1032 } 1033 } 1034 } 1035 /* copy over the B part */ 1036 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1037 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1038 for (i=0; i<mbs; i++) { 1039 rvals[0] = bs*(baij->rstartbs + i); 1040 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1041 for (j=ai[i]; j<ai[i+1]; j++) { 1042 col = baij->garray[aj[j]]*bs; 1043 for (k=0; k<bs; k++) { 1044 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1045 col++; a += bs; 1046 } 1047 } 1048 } 1049 ierr = PetscFree(rvals);CHKERRQ(ierr); 1050 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1051 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1052 /* 1053 Everyone has to call to draw the matrix since the graphics waits are 1054 synchronized across all processors that share the PetscDraw object 1055 */ 1056 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1057 if (!rank) { 1058 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1059 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1060 } 1061 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1062 ierr = MatDestroy(A);CHKERRQ(ierr); 1063 } 1064 PetscFunctionReturn(0); 1065 } 1066 1067 #undef __FUNCT__ 1068 #define __FUNCT__ "MatView_MPIBAIJ" 1069 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1070 { 1071 PetscErrorCode ierr; 1072 PetscTruth iascii,isdraw,issocket,isbinary; 1073 1074 PetscFunctionBegin; 1075 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1076 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1077 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1078 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1079 if (iascii || isdraw || issocket || isbinary) { 1080 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1081 } else { 1082 SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1083 } 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1089 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1090 { 1091 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1092 PetscErrorCode ierr; 1093 1094 PetscFunctionBegin; 1095 #if defined(PETSC_USE_LOG) 1096 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1097 #endif 1098 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1099 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1100 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1101 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1102 #if defined (PETSC_USE_CTABLE) 1103 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 1104 #else 1105 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1106 #endif 1107 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1108 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1109 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1110 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1111 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1112 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1113 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1114 ierr = PetscFree(baij);CHKERRQ(ierr); 1115 1116 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1117 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1118 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1119 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1120 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1121 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1122 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1123 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1124 PetscFunctionReturn(0); 1125 } 1126 1127 #undef __FUNCT__ 1128 #define __FUNCT__ "MatMult_MPIBAIJ" 1129 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1130 { 1131 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1132 PetscErrorCode ierr; 1133 PetscInt nt; 1134 1135 PetscFunctionBegin; 1136 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1137 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1138 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1139 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1140 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1141 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1142 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1143 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1144 PetscFunctionReturn(0); 1145 } 1146 1147 #undef __FUNCT__ 1148 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1149 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1150 { 1151 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1152 PetscErrorCode ierr; 1153 1154 PetscFunctionBegin; 1155 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1156 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1157 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1158 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1159 PetscFunctionReturn(0); 1160 } 1161 1162 #undef __FUNCT__ 1163 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1164 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1165 { 1166 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1167 PetscErrorCode ierr; 1168 PetscTruth merged; 1169 1170 PetscFunctionBegin; 1171 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1172 /* do nondiagonal part */ 1173 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1174 if (!merged) { 1175 /* send it on its way */ 1176 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1177 /* do local part */ 1178 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1179 /* receive remote parts: note this assumes the values are not actually */ 1180 /* inserted in yy until the next line */ 1181 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1182 } else { 1183 /* do local part */ 1184 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1185 /* send it on its way */ 1186 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1187 /* values actually were received in the Begin() but we need to call this nop */ 1188 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1189 } 1190 PetscFunctionReturn(0); 1191 } 1192 1193 #undef __FUNCT__ 1194 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1195 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1196 { 1197 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1198 PetscErrorCode ierr; 1199 1200 PetscFunctionBegin; 1201 /* do nondiagonal part */ 1202 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1203 /* send it on its way */ 1204 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1205 /* do local part */ 1206 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1207 /* receive remote parts: note this assumes the values are not actually */ 1208 /* inserted in yy until the next line, which is true for my implementation*/ 1209 /* but is not perhaps always true. */ 1210 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1211 PetscFunctionReturn(0); 1212 } 1213 1214 /* 1215 This only works correctly for square matrices where the subblock A->A is the 1216 diagonal block 1217 */ 1218 #undef __FUNCT__ 1219 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1220 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1221 { 1222 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1223 PetscErrorCode ierr; 1224 1225 PetscFunctionBegin; 1226 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1227 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1228 PetscFunctionReturn(0); 1229 } 1230 1231 #undef __FUNCT__ 1232 #define __FUNCT__ "MatScale_MPIBAIJ" 1233 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1234 { 1235 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1236 PetscErrorCode ierr; 1237 1238 PetscFunctionBegin; 1239 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1240 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1241 PetscFunctionReturn(0); 1242 } 1243 1244 #undef __FUNCT__ 1245 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1246 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1247 { 1248 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1249 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1250 PetscErrorCode ierr; 1251 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1252 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1253 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1254 1255 PetscFunctionBegin; 1256 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1257 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1258 mat->getrowactive = PETSC_TRUE; 1259 1260 if (!mat->rowvalues && (idx || v)) { 1261 /* 1262 allocate enough space to hold information from the longest row. 1263 */ 1264 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1265 PetscInt max = 1,mbs = mat->mbs,tmp; 1266 for (i=0; i<mbs; i++) { 1267 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1268 if (max < tmp) { max = tmp; } 1269 } 1270 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1271 } 1272 lrow = row - brstart; 1273 1274 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1275 if (!v) {pvA = 0; pvB = 0;} 1276 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1277 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1278 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1279 nztot = nzA + nzB; 1280 1281 cmap = mat->garray; 1282 if (v || idx) { 1283 if (nztot) { 1284 /* Sort by increasing column numbers, assuming A and B already sorted */ 1285 PetscInt imark = -1; 1286 if (v) { 1287 *v = v_p = mat->rowvalues; 1288 for (i=0; i<nzB; i++) { 1289 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1290 else break; 1291 } 1292 imark = i; 1293 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1294 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1295 } 1296 if (idx) { 1297 *idx = idx_p = mat->rowindices; 1298 if (imark > -1) { 1299 for (i=0; i<imark; i++) { 1300 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1301 } 1302 } else { 1303 for (i=0; i<nzB; i++) { 1304 if (cmap[cworkB[i]/bs] < cstart) 1305 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1306 else break; 1307 } 1308 imark = i; 1309 } 1310 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1311 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1312 } 1313 } else { 1314 if (idx) *idx = 0; 1315 if (v) *v = 0; 1316 } 1317 } 1318 *nz = nztot; 1319 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1320 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1321 PetscFunctionReturn(0); 1322 } 1323 1324 #undef __FUNCT__ 1325 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1326 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1327 { 1328 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1329 1330 PetscFunctionBegin; 1331 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1332 baij->getrowactive = PETSC_FALSE; 1333 PetscFunctionReturn(0); 1334 } 1335 1336 #undef __FUNCT__ 1337 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1338 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1339 { 1340 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1341 PetscErrorCode ierr; 1342 1343 PetscFunctionBegin; 1344 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1345 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1346 PetscFunctionReturn(0); 1347 } 1348 1349 #undef __FUNCT__ 1350 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1351 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1352 { 1353 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1354 Mat A = a->A,B = a->B; 1355 PetscErrorCode ierr; 1356 PetscReal isend[5],irecv[5]; 1357 1358 PetscFunctionBegin; 1359 info->block_size = (PetscReal)matin->rmap->bs; 1360 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1361 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1362 isend[3] = info->memory; isend[4] = info->mallocs; 1363 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1364 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1365 isend[3] += info->memory; isend[4] += info->mallocs; 1366 if (flag == MAT_LOCAL) { 1367 info->nz_used = isend[0]; 1368 info->nz_allocated = isend[1]; 1369 info->nz_unneeded = isend[2]; 1370 info->memory = isend[3]; 1371 info->mallocs = isend[4]; 1372 } else if (flag == MAT_GLOBAL_MAX) { 1373 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1374 info->nz_used = irecv[0]; 1375 info->nz_allocated = irecv[1]; 1376 info->nz_unneeded = irecv[2]; 1377 info->memory = irecv[3]; 1378 info->mallocs = irecv[4]; 1379 } else if (flag == MAT_GLOBAL_SUM) { 1380 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1381 info->nz_used = irecv[0]; 1382 info->nz_allocated = irecv[1]; 1383 info->nz_unneeded = irecv[2]; 1384 info->memory = irecv[3]; 1385 info->mallocs = irecv[4]; 1386 } else { 1387 SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1388 } 1389 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1390 info->fill_ratio_needed = 0; 1391 info->factor_mallocs = 0; 1392 PetscFunctionReturn(0); 1393 } 1394 1395 #undef __FUNCT__ 1396 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1397 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg) 1398 { 1399 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1400 PetscErrorCode ierr; 1401 1402 PetscFunctionBegin; 1403 switch (op) { 1404 case MAT_NEW_NONZERO_LOCATIONS: 1405 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1406 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1407 case MAT_KEEP_NONZERO_PATTERN: 1408 case MAT_NEW_NONZERO_LOCATION_ERR: 1409 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1410 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1411 break; 1412 case MAT_ROW_ORIENTED: 1413 a->roworiented = flg; 1414 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1415 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1416 break; 1417 case MAT_NEW_DIAGONALS: 1418 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1419 break; 1420 case MAT_IGNORE_OFF_PROC_ENTRIES: 1421 a->donotstash = flg; 1422 break; 1423 case MAT_USE_HASH_TABLE: 1424 a->ht_flag = flg; 1425 break; 1426 case MAT_SYMMETRIC: 1427 case MAT_STRUCTURALLY_SYMMETRIC: 1428 case MAT_HERMITIAN: 1429 case MAT_SYMMETRY_ETERNAL: 1430 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1431 break; 1432 default: 1433 SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op); 1434 } 1435 PetscFunctionReturn(0); 1436 } 1437 1438 #undef __FUNCT__ 1439 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1440 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1441 { 1442 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1443 Mat_SeqBAIJ *Aloc; 1444 Mat B; 1445 PetscErrorCode ierr; 1446 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1447 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1448 MatScalar *a; 1449 1450 PetscFunctionBegin; 1451 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1452 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1453 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1454 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1455 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1456 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1457 } else { 1458 B = *matout; 1459 } 1460 1461 /* copy over the A part */ 1462 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1463 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1464 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1465 1466 for (i=0; i<mbs; i++) { 1467 rvals[0] = bs*(baij->rstartbs + i); 1468 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1469 for (j=ai[i]; j<ai[i+1]; j++) { 1470 col = (baij->cstartbs+aj[j])*bs; 1471 for (k=0; k<bs; k++) { 1472 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1473 col++; a += bs; 1474 } 1475 } 1476 } 1477 /* copy over the B part */ 1478 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1479 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1480 for (i=0; i<mbs; i++) { 1481 rvals[0] = bs*(baij->rstartbs + i); 1482 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1483 for (j=ai[i]; j<ai[i+1]; j++) { 1484 col = baij->garray[aj[j]]*bs; 1485 for (k=0; k<bs; k++) { 1486 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1487 col++; a += bs; 1488 } 1489 } 1490 } 1491 ierr = PetscFree(rvals);CHKERRQ(ierr); 1492 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1493 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1494 1495 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1496 *matout = B; 1497 } else { 1498 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1499 } 1500 PetscFunctionReturn(0); 1501 } 1502 1503 #undef __FUNCT__ 1504 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1505 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1506 { 1507 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1508 Mat a = baij->A,b = baij->B; 1509 PetscErrorCode ierr; 1510 PetscInt s1,s2,s3; 1511 1512 PetscFunctionBegin; 1513 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1514 if (rr) { 1515 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1516 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1517 /* Overlap communication with computation. */ 1518 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1519 } 1520 if (ll) { 1521 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1522 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1523 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1524 } 1525 /* scale the diagonal block */ 1526 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1527 1528 if (rr) { 1529 /* Do a scatter end and then right scale the off-diagonal block */ 1530 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1531 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1532 } 1533 1534 PetscFunctionReturn(0); 1535 } 1536 1537 #undef __FUNCT__ 1538 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1539 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 1540 { 1541 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1542 PetscErrorCode ierr; 1543 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1544 PetscInt i,*owners = A->rmap->range; 1545 PetscInt *nprocs,j,idx,nsends,row; 1546 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1547 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1548 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1549 MPI_Comm comm = ((PetscObject)A)->comm; 1550 MPI_Request *send_waits,*recv_waits; 1551 MPI_Status recv_status,*send_status; 1552 #if defined(PETSC_DEBUG) 1553 PetscTruth found = PETSC_FALSE; 1554 #endif 1555 1556 PetscFunctionBegin; 1557 /* first count number of contributors to each processor */ 1558 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1559 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1560 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1561 j = 0; 1562 for (i=0; i<N; i++) { 1563 if (lastidx > (idx = rows[i])) j = 0; 1564 lastidx = idx; 1565 for (; j<size; j++) { 1566 if (idx >= owners[j] && idx < owners[j+1]) { 1567 nprocs[2*j]++; 1568 nprocs[2*j+1] = 1; 1569 owner[i] = j; 1570 #if defined(PETSC_DEBUG) 1571 found = PETSC_TRUE; 1572 #endif 1573 break; 1574 } 1575 } 1576 #if defined(PETSC_DEBUG) 1577 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1578 found = PETSC_FALSE; 1579 #endif 1580 } 1581 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1582 1583 /* inform other processors of number of messages and max length*/ 1584 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1585 1586 /* post receives: */ 1587 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1588 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1589 for (i=0; i<nrecvs; i++) { 1590 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1591 } 1592 1593 /* do sends: 1594 1) starts[i] gives the starting index in svalues for stuff going to 1595 the ith processor 1596 */ 1597 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1598 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1599 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1600 starts[0] = 0; 1601 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1602 for (i=0; i<N; i++) { 1603 svalues[starts[owner[i]]++] = rows[i]; 1604 } 1605 1606 starts[0] = 0; 1607 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1608 count = 0; 1609 for (i=0; i<size; i++) { 1610 if (nprocs[2*i+1]) { 1611 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1612 } 1613 } 1614 ierr = PetscFree(starts);CHKERRQ(ierr); 1615 1616 base = owners[rank]; 1617 1618 /* wait on receives */ 1619 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1620 count = nrecvs; 1621 slen = 0; 1622 while (count) { 1623 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1624 /* unpack receives into our local space */ 1625 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1626 source[imdex] = recv_status.MPI_SOURCE; 1627 lens[imdex] = n; 1628 slen += n; 1629 count--; 1630 } 1631 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1632 1633 /* move the data into the send scatter */ 1634 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1635 count = 0; 1636 for (i=0; i<nrecvs; i++) { 1637 values = rvalues + i*nmax; 1638 for (j=0; j<lens[i]; j++) { 1639 lrows[count++] = values[j] - base; 1640 } 1641 } 1642 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1643 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1644 ierr = PetscFree(owner);CHKERRQ(ierr); 1645 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1646 1647 /* actually zap the local rows */ 1648 /* 1649 Zero the required rows. If the "diagonal block" of the matrix 1650 is square and the user wishes to set the diagonal we use separate 1651 code so that MatSetValues() is not called for each diagonal allocating 1652 new memory, thus calling lots of mallocs and slowing things down. 1653 1654 */ 1655 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1656 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr); 1657 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1658 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr); 1659 } else if (diag != 0.0) { 1660 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1661 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1662 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1663 for (i=0; i<slen; i++) { 1664 row = lrows[i] + rstart_bs; 1665 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1666 } 1667 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1668 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1669 } else { 1670 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1671 } 1672 1673 ierr = PetscFree(lrows);CHKERRQ(ierr); 1674 1675 /* wait on sends */ 1676 if (nsends) { 1677 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1678 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1679 ierr = PetscFree(send_status);CHKERRQ(ierr); 1680 } 1681 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1682 ierr = PetscFree(svalues);CHKERRQ(ierr); 1683 1684 PetscFunctionReturn(0); 1685 } 1686 1687 #undef __FUNCT__ 1688 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1689 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1690 { 1691 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1692 PetscErrorCode ierr; 1693 1694 PetscFunctionBegin; 1695 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1696 PetscFunctionReturn(0); 1697 } 1698 1699 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1700 1701 #undef __FUNCT__ 1702 #define __FUNCT__ "MatEqual_MPIBAIJ" 1703 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1704 { 1705 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1706 Mat a,b,c,d; 1707 PetscTruth flg; 1708 PetscErrorCode ierr; 1709 1710 PetscFunctionBegin; 1711 a = matA->A; b = matA->B; 1712 c = matB->A; d = matB->B; 1713 1714 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1715 if (flg) { 1716 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1717 } 1718 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1719 PetscFunctionReturn(0); 1720 } 1721 1722 #undef __FUNCT__ 1723 #define __FUNCT__ "MatCopy_MPIBAIJ" 1724 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1725 { 1726 PetscErrorCode ierr; 1727 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1728 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1729 1730 PetscFunctionBegin; 1731 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1732 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1733 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1734 } else { 1735 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1736 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1737 } 1738 PetscFunctionReturn(0); 1739 } 1740 1741 #undef __FUNCT__ 1742 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1743 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1744 { 1745 PetscErrorCode ierr; 1746 1747 PetscFunctionBegin; 1748 ierr = MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1749 PetscFunctionReturn(0); 1750 } 1751 1752 #undef __FUNCT__ 1753 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1754 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1755 { 1756 PetscErrorCode ierr; 1757 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1758 PetscBLASInt bnz,one=1; 1759 Mat_SeqBAIJ *x,*y; 1760 1761 PetscFunctionBegin; 1762 if (str == SAME_NONZERO_PATTERN) { 1763 PetscScalar alpha = a; 1764 x = (Mat_SeqBAIJ *)xx->A->data; 1765 y = (Mat_SeqBAIJ *)yy->A->data; 1766 bnz = PetscBLASIntCast(x->nz); 1767 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1768 x = (Mat_SeqBAIJ *)xx->B->data; 1769 y = (Mat_SeqBAIJ *)yy->B->data; 1770 bnz = PetscBLASIntCast(x->nz); 1771 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1772 } else { 1773 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1774 } 1775 PetscFunctionReturn(0); 1776 } 1777 1778 #undef __FUNCT__ 1779 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ" 1780 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs) 1781 { 1782 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1783 PetscInt rbs,cbs; 1784 PetscErrorCode ierr; 1785 1786 PetscFunctionBegin; 1787 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1788 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1789 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 1790 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 1791 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 1792 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 1793 PetscFunctionReturn(0); 1794 } 1795 1796 #undef __FUNCT__ 1797 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1798 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1799 { 1800 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1801 PetscErrorCode ierr; 1802 1803 PetscFunctionBegin; 1804 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1805 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1806 PetscFunctionReturn(0); 1807 } 1808 1809 #undef __FUNCT__ 1810 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 1811 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1812 { 1813 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1814 PetscErrorCode ierr; 1815 1816 PetscFunctionBegin; 1817 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1818 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1819 PetscFunctionReturn(0); 1820 } 1821 1822 #undef __FUNCT__ 1823 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 1824 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1825 { 1826 PetscErrorCode ierr; 1827 IS iscol_local; 1828 PetscInt csize; 1829 1830 PetscFunctionBegin; 1831 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1832 if (call == MAT_REUSE_MATRIX) { 1833 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1834 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1835 } else { 1836 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1837 } 1838 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1839 if (call == MAT_INITIAL_MATRIX) { 1840 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1841 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 1842 } 1843 PetscFunctionReturn(0); 1844 } 1845 1846 #undef __FUNCT__ 1847 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 1848 /* 1849 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1850 in local and then by concatenating the local matrices the end result. 1851 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 1852 */ 1853 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 1854 { 1855 PetscErrorCode ierr; 1856 PetscMPIInt rank,size; 1857 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 1858 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 1859 Mat *local,M,Mreuse; 1860 MatScalar *vwork,*aa; 1861 MPI_Comm comm = ((PetscObject)mat)->comm; 1862 Mat_SeqBAIJ *aij; 1863 1864 1865 PetscFunctionBegin; 1866 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1867 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1868 1869 if (call == MAT_REUSE_MATRIX) { 1870 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1871 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1872 local = &Mreuse; 1873 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1874 } else { 1875 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1876 Mreuse = *local; 1877 ierr = PetscFree(local);CHKERRQ(ierr); 1878 } 1879 1880 /* 1881 m - number of local rows 1882 n - number of columns (same on all processors) 1883 rstart - first row in new global matrix generated 1884 */ 1885 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 1886 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1887 m = m/bs; 1888 n = n/bs; 1889 1890 if (call == MAT_INITIAL_MATRIX) { 1891 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1892 ii = aij->i; 1893 jj = aij->j; 1894 1895 /* 1896 Determine the number of non-zeros in the diagonal and off-diagonal 1897 portions of the matrix in order to do correct preallocation 1898 */ 1899 1900 /* first get start and end of "diagonal" columns */ 1901 if (csize == PETSC_DECIDE) { 1902 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 1903 if (mglobal == n*bs) { /* square matrix */ 1904 nlocal = m; 1905 } else { 1906 nlocal = n/size + ((n % size) > rank); 1907 } 1908 } else { 1909 nlocal = csize/bs; 1910 } 1911 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1912 rstart = rend - nlocal; 1913 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 1914 1915 /* next, compute all the lengths */ 1916 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 1917 olens = dlens + m; 1918 for (i=0; i<m; i++) { 1919 jend = ii[i+1] - ii[i]; 1920 olen = 0; 1921 dlen = 0; 1922 for (j=0; j<jend; j++) { 1923 if (*jj < rstart || *jj >= rend) olen++; 1924 else dlen++; 1925 jj++; 1926 } 1927 olens[i] = olen; 1928 dlens[i] = dlen; 1929 } 1930 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 1931 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 1932 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 1933 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 1934 ierr = PetscFree(dlens);CHKERRQ(ierr); 1935 } else { 1936 PetscInt ml,nl; 1937 1938 M = *newmat; 1939 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1940 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1941 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1942 /* 1943 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1944 rather than the slower MatSetValues(). 1945 */ 1946 M->was_assembled = PETSC_TRUE; 1947 M->assembled = PETSC_FALSE; 1948 } 1949 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1950 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 1951 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1952 ii = aij->i; 1953 jj = aij->j; 1954 aa = aij->a; 1955 for (i=0; i<m; i++) { 1956 row = rstart/bs + i; 1957 nz = ii[i+1] - ii[i]; 1958 cwork = jj; jj += nz; 1959 vwork = aa; aa += nz; 1960 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 1961 } 1962 1963 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1964 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1965 *newmat = M; 1966 1967 /* save submatrix used in processor for next request */ 1968 if (call == MAT_INITIAL_MATRIX) { 1969 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 1970 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 1971 } 1972 1973 PetscFunctionReturn(0); 1974 } 1975 1976 #undef __FUNCT__ 1977 #define __FUNCT__ "MatPermute_MPIBAIJ" 1978 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 1979 { 1980 MPI_Comm comm,pcomm; 1981 PetscInt first,local_size,nrows; 1982 const PetscInt *rows; 1983 PetscMPIInt size; 1984 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1985 PetscErrorCode ierr; 1986 1987 PetscFunctionBegin; 1988 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1989 /* make a collective version of 'rowp' */ 1990 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1991 if (pcomm==comm) { 1992 crowp = rowp; 1993 } else { 1994 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1995 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1996 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1997 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1998 } 1999 /* collect the global row permutation and invert it */ 2000 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2001 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2002 if (pcomm!=comm) { 2003 ierr = ISDestroy(crowp);CHKERRQ(ierr); 2004 } 2005 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2006 /* get the local target indices */ 2007 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2008 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 2009 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2010 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 2011 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2012 ierr = ISDestroy(irowp);CHKERRQ(ierr); 2013 /* the column permutation is so much easier; 2014 make a local version of 'colp' and invert it */ 2015 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2016 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2017 if (size==1) { 2018 lcolp = colp; 2019 } else { 2020 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 2021 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 2022 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 2023 } 2024 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2025 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 2026 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 2027 if (size>1) { 2028 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 2029 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 2030 } 2031 /* now we just get the submatrix */ 2032 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2033 /* clean up */ 2034 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 2035 ierr = ISDestroy(icolp);CHKERRQ(ierr); 2036 PetscFunctionReturn(0); 2037 } 2038 2039 #undef __FUNCT__ 2040 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2041 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2042 { 2043 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2044 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2045 2046 PetscFunctionBegin; 2047 if (nghosts) { *nghosts = B->nbs;} 2048 if (ghosts) {*ghosts = baij->garray;} 2049 PetscFunctionReturn(0); 2050 } 2051 2052 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat); 2053 2054 #undef __FUNCT__ 2055 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2056 /* 2057 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2058 */ 2059 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2060 { 2061 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2062 PetscErrorCode ierr; 2063 PetscMPIInt size,*ncolsonproc,*disp,nn; 2064 PetscInt bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col; 2065 const PetscInt *is; 2066 PetscInt nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj; 2067 PetscInt *rowhit,M,cstart,cend,colb; 2068 PetscInt *columnsforrow,l; 2069 IS *isa; 2070 PetscTruth done,flg; 2071 ISLocalToGlobalMapping map = mat->bmapping; 2072 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2073 2074 PetscFunctionBegin; 2075 if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2076 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2077 2078 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2079 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2080 M = mat->rmap->n/bs; 2081 cstart = mat->cmap->rstart/bs; 2082 cend = mat->cmap->rend/bs; 2083 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2084 c->N = mat->cmap->N/bs; 2085 c->m = mat->rmap->n/bs; 2086 c->rstart = mat->rmap->rstart/bs; 2087 2088 c->ncolors = nis; 2089 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2090 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2091 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2092 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2093 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2094 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2095 2096 /* Allow access to data structures of local part of matrix */ 2097 if (!baij->colmap) { 2098 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2099 } 2100 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2101 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2102 2103 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2104 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2105 2106 for (i=0; i<nis; i++) { 2107 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2108 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2109 c->ncolumns[i] = n; 2110 if (n) { 2111 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2112 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2113 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2114 } else { 2115 c->columns[i] = 0; 2116 } 2117 2118 if (ctype == IS_COLORING_GLOBAL){ 2119 /* Determine the total (parallel) number of columns of this color */ 2120 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2121 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2122 2123 nn = PetscMPIIntCast(n); 2124 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2125 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2126 if (!nctot) { 2127 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2128 } 2129 2130 disp[0] = 0; 2131 for (j=1; j<size; j++) { 2132 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2133 } 2134 2135 /* Get complete list of columns for color on each processor */ 2136 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2137 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2138 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2139 } else if (ctype == IS_COLORING_GHOSTED){ 2140 /* Determine local number of columns of this color on this process, including ghost points */ 2141 nctot = n; 2142 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2143 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2144 } else { 2145 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2146 } 2147 2148 /* 2149 Mark all rows affect by these columns 2150 */ 2151 /* Temporary option to allow for debugging/testing */ 2152 flg = PETSC_FALSE; 2153 ierr = PetscOptionsGetTruth(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2154 if (!flg) {/*-----------------------------------------------------------------------------*/ 2155 /* crude, fast version */ 2156 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2157 /* loop over columns*/ 2158 for (j=0; j<nctot; j++) { 2159 if (ctype == IS_COLORING_GHOSTED) { 2160 col = ltog[cols[j]]; 2161 } else { 2162 col = cols[j]; 2163 } 2164 if (col >= cstart && col < cend) { 2165 /* column is in diagonal block of matrix */ 2166 rows = A_cj + A_ci[col-cstart]; 2167 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2168 } else { 2169 #if defined (PETSC_USE_CTABLE) 2170 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2171 colb --; 2172 #else 2173 colb = baij->colmap[col] - 1; 2174 #endif 2175 if (colb == -1) { 2176 m = 0; 2177 } else { 2178 colb = colb/bs; 2179 rows = B_cj + B_ci[colb]; 2180 m = B_ci[colb+1] - B_ci[colb]; 2181 } 2182 } 2183 /* loop over columns marking them in rowhit */ 2184 for (k=0; k<m; k++) { 2185 rowhit[*rows++] = col + 1; 2186 } 2187 } 2188 2189 /* count the number of hits */ 2190 nrows = 0; 2191 for (j=0; j<M; j++) { 2192 if (rowhit[j]) nrows++; 2193 } 2194 c->nrows[i] = nrows; 2195 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2196 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2197 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2198 nrows = 0; 2199 for (j=0; j<M; j++) { 2200 if (rowhit[j]) { 2201 c->rows[i][nrows] = j; 2202 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2203 nrows++; 2204 } 2205 } 2206 } else {/*-------------------------------------------------------------------------------*/ 2207 /* slow version, using rowhit as a linked list */ 2208 PetscInt currentcol,fm,mfm; 2209 rowhit[M] = M; 2210 nrows = 0; 2211 /* loop over columns*/ 2212 for (j=0; j<nctot; j++) { 2213 if (ctype == IS_COLORING_GHOSTED) { 2214 col = ltog[cols[j]]; 2215 } else { 2216 col = cols[j]; 2217 } 2218 if (col >= cstart && col < cend) { 2219 /* column is in diagonal block of matrix */ 2220 rows = A_cj + A_ci[col-cstart]; 2221 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2222 } else { 2223 #if defined (PETSC_USE_CTABLE) 2224 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2225 colb --; 2226 #else 2227 colb = baij->colmap[col] - 1; 2228 #endif 2229 if (colb == -1) { 2230 m = 0; 2231 } else { 2232 colb = colb/bs; 2233 rows = B_cj + B_ci[colb]; 2234 m = B_ci[colb+1] - B_ci[colb]; 2235 } 2236 } 2237 2238 /* loop over columns marking them in rowhit */ 2239 fm = M; /* fm points to first entry in linked list */ 2240 for (k=0; k<m; k++) { 2241 currentcol = *rows++; 2242 /* is it already in the list? */ 2243 do { 2244 mfm = fm; 2245 fm = rowhit[fm]; 2246 } while (fm < currentcol); 2247 /* not in list so add it */ 2248 if (fm != currentcol) { 2249 nrows++; 2250 columnsforrow[currentcol] = col; 2251 /* next three lines insert new entry into linked list */ 2252 rowhit[mfm] = currentcol; 2253 rowhit[currentcol] = fm; 2254 fm = currentcol; 2255 /* fm points to present position in list since we know the columns are sorted */ 2256 } else { 2257 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2258 } 2259 } 2260 } 2261 c->nrows[i] = nrows; 2262 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2263 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2264 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2265 /* now store the linked list of rows into c->rows[i] */ 2266 nrows = 0; 2267 fm = rowhit[M]; 2268 do { 2269 c->rows[i][nrows] = fm; 2270 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2271 fm = rowhit[fm]; 2272 } while (fm < M); 2273 } /* ---------------------------------------------------------------------------------------*/ 2274 ierr = PetscFree(cols);CHKERRQ(ierr); 2275 } 2276 2277 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2278 /* 2279 vscale will contain the "diagonal" on processor scalings followed by the off processor 2280 */ 2281 if (ctype == IS_COLORING_GLOBAL) { 2282 PetscInt *garray; 2283 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2284 for (i=0; i<baij->B->cmap->n/bs; i++) { 2285 for (j=0; j<bs; j++) { 2286 garray[i*bs+j] = bs*baij->garray[i]+j; 2287 } 2288 } 2289 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2290 ierr = PetscFree(garray);CHKERRQ(ierr); 2291 CHKMEMQ; 2292 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2293 for (k=0; k<c->ncolors; k++) { 2294 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2295 for (l=0; l<c->nrows[k]; l++) { 2296 col = c->columnsforrow[k][l]; 2297 if (col >= cstart && col < cend) { 2298 /* column is in diagonal block of matrix */ 2299 colb = col - cstart; 2300 } else { 2301 /* column is in "off-processor" part */ 2302 #if defined (PETSC_USE_CTABLE) 2303 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2304 colb --; 2305 #else 2306 colb = baij->colmap[col] - 1; 2307 #endif 2308 colb = colb/bs; 2309 colb += cend - cstart; 2310 } 2311 c->vscaleforrow[k][l] = colb; 2312 } 2313 } 2314 } else if (ctype == IS_COLORING_GHOSTED) { 2315 /* Get gtol mapping */ 2316 PetscInt N = mat->cmap->N, *gtol; 2317 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2318 for (i=0; i<N; i++) gtol[i] = -1; 2319 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2320 2321 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2322 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2323 for (k=0; k<c->ncolors; k++) { 2324 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2325 for (l=0; l<c->nrows[k]; l++) { 2326 col = c->columnsforrow[k][l]; /* global column index */ 2327 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2328 } 2329 } 2330 ierr = PetscFree(gtol);CHKERRQ(ierr); 2331 } 2332 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2333 2334 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2335 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2336 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2337 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2338 CHKMEMQ; 2339 PetscFunctionReturn(0); 2340 } 2341 2342 #undef __FUNCT__ 2343 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ" 2344 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat) 2345 { 2346 Mat B; 2347 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2348 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2349 Mat_SeqAIJ *b; 2350 PetscErrorCode ierr; 2351 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2352 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2353 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2354 2355 PetscFunctionBegin; 2356 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2357 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2358 2359 /* ---------------------------------------------------------------- 2360 Tell every processor the number of nonzeros per row 2361 */ 2362 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2363 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2364 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2365 } 2366 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2367 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2368 displs = recvcounts + size; 2369 for (i=0; i<size; i++) { 2370 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2371 displs[i] = A->rmap->range[i]/bs; 2372 } 2373 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2374 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2375 #else 2376 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2377 #endif 2378 /* --------------------------------------------------------------- 2379 Create the sequential matrix of the same type as the local block diagonal 2380 */ 2381 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2382 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2383 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2384 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2385 b = (Mat_SeqAIJ *)B->data; 2386 2387 /*-------------------------------------------------------------------- 2388 Copy my part of matrix column indices over 2389 */ 2390 sendcount = ad->nz + bd->nz; 2391 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2392 a_jsendbuf = ad->j; 2393 b_jsendbuf = bd->j; 2394 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2395 cnt = 0; 2396 for (i=0; i<n; i++) { 2397 2398 /* put in lower diagonal portion */ 2399 m = bd->i[i+1] - bd->i[i]; 2400 while (m > 0) { 2401 /* is it above diagonal (in bd (compressed) numbering) */ 2402 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2403 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2404 m--; 2405 } 2406 2407 /* put in diagonal portion */ 2408 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2409 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2410 } 2411 2412 /* put in upper diagonal portion */ 2413 while (m-- > 0) { 2414 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2415 } 2416 } 2417 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2418 2419 /*-------------------------------------------------------------------- 2420 Gather all column indices to all processors 2421 */ 2422 for (i=0; i<size; i++) { 2423 recvcounts[i] = 0; 2424 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2425 recvcounts[i] += lens[j]; 2426 } 2427 } 2428 displs[0] = 0; 2429 for (i=1; i<size; i++) { 2430 displs[i] = displs[i-1] + recvcounts[i-1]; 2431 } 2432 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2433 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2434 #else 2435 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2436 #endif 2437 /*-------------------------------------------------------------------- 2438 Assemble the matrix into useable form (note numerical values not yet set) 2439 */ 2440 /* set the b->ilen (length of each row) values */ 2441 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2442 /* set the b->i indices */ 2443 b->i[0] = 0; 2444 for (i=1; i<=A->rmap->N/bs; i++) { 2445 b->i[i] = b->i[i-1] + lens[i-1]; 2446 } 2447 ierr = PetscFree(lens);CHKERRQ(ierr); 2448 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2449 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2450 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2451 2452 if (A->symmetric){ 2453 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2454 } else if (A->hermitian) { 2455 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2456 } else if (A->structurally_symmetric) { 2457 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2458 } 2459 *newmat = B; 2460 PetscFunctionReturn(0); 2461 } 2462 2463 #undef __FUNCT__ 2464 #define __FUNCT__ "MatSOR_MPIBAIJ" 2465 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2466 { 2467 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2468 PetscErrorCode ierr; 2469 Vec bb1 = 0; 2470 2471 PetscFunctionBegin; 2472 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2473 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2474 } 2475 2476 if (flag == SOR_APPLY_UPPER) { 2477 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2478 PetscFunctionReturn(0); 2479 } 2480 2481 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2482 if (flag & SOR_ZERO_INITIAL_GUESS) { 2483 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2484 its--; 2485 } 2486 2487 while (its--) { 2488 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2489 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2490 2491 /* update rhs: bb1 = bb - B*x */ 2492 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2493 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2494 2495 /* local sweep */ 2496 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2497 } 2498 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 2499 if (flag & SOR_ZERO_INITIAL_GUESS) { 2500 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2501 its--; 2502 } 2503 while (its--) { 2504 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2505 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2506 2507 /* update rhs: bb1 = bb - B*x */ 2508 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2509 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2510 2511 /* local sweep */ 2512 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2513 } 2514 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 2515 if (flag & SOR_ZERO_INITIAL_GUESS) { 2516 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2517 its--; 2518 } 2519 while (its--) { 2520 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2521 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2522 2523 /* update rhs: bb1 = bb - B*x */ 2524 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2525 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2526 2527 /* local sweep */ 2528 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2529 } 2530 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2531 2532 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 2533 PetscFunctionReturn(0); 2534 } 2535 2536 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2537 2538 2539 /* -------------------------------------------------------------------*/ 2540 static struct _MatOps MatOps_Values = { 2541 MatSetValues_MPIBAIJ, 2542 MatGetRow_MPIBAIJ, 2543 MatRestoreRow_MPIBAIJ, 2544 MatMult_MPIBAIJ, 2545 /* 4*/ MatMultAdd_MPIBAIJ, 2546 MatMultTranspose_MPIBAIJ, 2547 MatMultTransposeAdd_MPIBAIJ, 2548 0, 2549 0, 2550 0, 2551 /*10*/ 0, 2552 0, 2553 0, 2554 MatSOR_MPIBAIJ, 2555 MatTranspose_MPIBAIJ, 2556 /*15*/ MatGetInfo_MPIBAIJ, 2557 MatEqual_MPIBAIJ, 2558 MatGetDiagonal_MPIBAIJ, 2559 MatDiagonalScale_MPIBAIJ, 2560 MatNorm_MPIBAIJ, 2561 /*20*/ MatAssemblyBegin_MPIBAIJ, 2562 MatAssemblyEnd_MPIBAIJ, 2563 MatSetOption_MPIBAIJ, 2564 MatZeroEntries_MPIBAIJ, 2565 /*24*/ MatZeroRows_MPIBAIJ, 2566 0, 2567 0, 2568 0, 2569 0, 2570 /*29*/ MatSetUpPreallocation_MPIBAIJ, 2571 0, 2572 0, 2573 0, 2574 0, 2575 /*34*/ MatDuplicate_MPIBAIJ, 2576 0, 2577 0, 2578 0, 2579 0, 2580 /*39*/ MatAXPY_MPIBAIJ, 2581 MatGetSubMatrices_MPIBAIJ, 2582 MatIncreaseOverlap_MPIBAIJ, 2583 MatGetValues_MPIBAIJ, 2584 MatCopy_MPIBAIJ, 2585 /*44*/ 0, 2586 MatScale_MPIBAIJ, 2587 0, 2588 0, 2589 0, 2590 /*49*/ MatSetBlockSize_MPIBAIJ, 2591 0, 2592 0, 2593 0, 2594 0, 2595 /*54*/ MatFDColoringCreate_MPIBAIJ, 2596 0, 2597 MatSetUnfactored_MPIBAIJ, 2598 MatPermute_MPIBAIJ, 2599 MatSetValuesBlocked_MPIBAIJ, 2600 /*59*/ MatGetSubMatrix_MPIBAIJ, 2601 MatDestroy_MPIBAIJ, 2602 MatView_MPIBAIJ, 2603 0, 2604 0, 2605 /*64*/ 0, 2606 0, 2607 0, 2608 0, 2609 0, 2610 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2611 0, 2612 0, 2613 0, 2614 0, 2615 /*74*/ 0, 2616 MatFDColoringApply_BAIJ, 2617 0, 2618 0, 2619 0, 2620 /*79*/ 0, 2621 0, 2622 0, 2623 0, 2624 MatLoad_MPIBAIJ, 2625 /*84*/ 0, 2626 0, 2627 0, 2628 0, 2629 0, 2630 /*89*/ 0, 2631 0, 2632 0, 2633 0, 2634 0, 2635 /*94*/ 0, 2636 0, 2637 0, 2638 0, 2639 0, 2640 /*99*/ 0, 2641 0, 2642 0, 2643 0, 2644 0, 2645 /*104*/0, 2646 MatRealPart_MPIBAIJ, 2647 MatImaginaryPart_MPIBAIJ, 2648 0, 2649 0, 2650 /*109*/0, 2651 0, 2652 0, 2653 0, 2654 0, 2655 /*114*/MatGetSeqNonzerostructure_MPIBAIJ, 2656 0, 2657 MatGetGhosts_MPIBAIJ, 2658 0, 2659 0, 2660 /*119*/0, 2661 0, 2662 0, 2663 0 2664 }; 2665 2666 EXTERN_C_BEGIN 2667 #undef __FUNCT__ 2668 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2669 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2670 { 2671 PetscFunctionBegin; 2672 *a = ((Mat_MPIBAIJ *)A->data)->A; 2673 *iscopy = PETSC_FALSE; 2674 PetscFunctionReturn(0); 2675 } 2676 EXTERN_C_END 2677 2678 EXTERN_C_BEGIN 2679 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2680 EXTERN_C_END 2681 2682 EXTERN_C_BEGIN 2683 #undef __FUNCT__ 2684 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2685 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2686 { 2687 PetscInt m,rstart,cstart,cend; 2688 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2689 const PetscInt *JJ=0; 2690 PetscScalar *values=0; 2691 PetscErrorCode ierr; 2692 2693 PetscFunctionBegin; 2694 2695 if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2696 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2697 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2698 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2699 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2700 m = B->rmap->n/bs; 2701 rstart = B->rmap->rstart/bs; 2702 cstart = B->cmap->rstart/bs; 2703 cend = B->cmap->rend/bs; 2704 2705 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2706 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2707 for (i=0; i<m; i++) { 2708 nz = ii[i+1] - ii[i]; 2709 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2710 nz_max = PetscMax(nz_max,nz); 2711 JJ = jj + ii[i]; 2712 for (j=0; j<nz; j++) { 2713 if (*JJ >= cstart) break; 2714 JJ++; 2715 } 2716 d = 0; 2717 for (; j<nz; j++) { 2718 if (*JJ++ >= cend) break; 2719 d++; 2720 } 2721 d_nnz[i] = d; 2722 o_nnz[i] = nz - d; 2723 } 2724 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2725 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2726 2727 values = (PetscScalar*)V; 2728 if (!values) { 2729 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2730 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2731 } 2732 for (i=0; i<m; i++) { 2733 PetscInt row = i + rstart; 2734 PetscInt ncols = ii[i+1] - ii[i]; 2735 const PetscInt *icols = jj + ii[i]; 2736 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2737 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2738 } 2739 2740 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2741 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2742 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2743 2744 PetscFunctionReturn(0); 2745 } 2746 EXTERN_C_END 2747 2748 #undef __FUNCT__ 2749 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2750 /*@C 2751 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2752 (the default parallel PETSc format). 2753 2754 Collective on MPI_Comm 2755 2756 Input Parameters: 2757 + A - the matrix 2758 . i - the indices into j for the start of each local row (starts with zero) 2759 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2760 - v - optional values in the matrix 2761 2762 Level: developer 2763 2764 .keywords: matrix, aij, compressed row, sparse, parallel 2765 2766 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2767 @*/ 2768 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2769 { 2770 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2771 2772 PetscFunctionBegin; 2773 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2774 if (f) { 2775 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2776 } 2777 PetscFunctionReturn(0); 2778 } 2779 2780 EXTERN_C_BEGIN 2781 #undef __FUNCT__ 2782 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2783 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2784 { 2785 Mat_MPIBAIJ *b; 2786 PetscErrorCode ierr; 2787 PetscInt i, newbs = PetscAbs(bs); 2788 2789 PetscFunctionBegin; 2790 if (bs < 0) { 2791 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 2792 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2793 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2794 bs = PetscAbs(bs); 2795 } 2796 if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 2797 bs = newbs; 2798 2799 2800 if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2801 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2802 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2803 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2804 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2805 2806 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2807 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2808 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2809 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2810 2811 if (d_nnz) { 2812 for (i=0; i<B->rmap->n/bs; i++) { 2813 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2814 } 2815 } 2816 if (o_nnz) { 2817 for (i=0; i<B->rmap->n/bs; i++) { 2818 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2819 } 2820 } 2821 2822 b = (Mat_MPIBAIJ*)B->data; 2823 b->bs2 = bs*bs; 2824 b->mbs = B->rmap->n/bs; 2825 b->nbs = B->cmap->n/bs; 2826 b->Mbs = B->rmap->N/bs; 2827 b->Nbs = B->cmap->N/bs; 2828 2829 for (i=0; i<=b->size; i++) { 2830 b->rangebs[i] = B->rmap->range[i]/bs; 2831 } 2832 b->rstartbs = B->rmap->rstart/bs; 2833 b->rendbs = B->rmap->rend/bs; 2834 b->cstartbs = B->cmap->rstart/bs; 2835 b->cendbs = B->cmap->rend/bs; 2836 2837 if (!B->preallocated) { 2838 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2839 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2840 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2841 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2842 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2843 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2844 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2845 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2846 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 2847 } 2848 2849 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2850 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2851 B->preallocated = PETSC_TRUE; 2852 PetscFunctionReturn(0); 2853 } 2854 EXTERN_C_END 2855 2856 EXTERN_C_BEGIN 2857 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2858 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2859 EXTERN_C_END 2860 2861 2862 EXTERN_C_BEGIN 2863 #undef __FUNCT__ 2864 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 2865 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj) 2866 { 2867 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2868 PetscErrorCode ierr; 2869 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2870 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2871 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2872 2873 PetscFunctionBegin; 2874 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 2875 ii[0] = 0; 2876 CHKMEMQ; 2877 for (i=0; i<M; i++) { 2878 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2879 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 2880 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2881 /* remove one from count of matrix has diagonal */ 2882 for (j=id[i]; j<id[i+1]; j++) { 2883 if (jd[j] == i) {ii[i+1]--;break;} 2884 } 2885 CHKMEMQ; 2886 } 2887 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2888 cnt = 0; 2889 for (i=0; i<M; i++) { 2890 for (j=io[i]; j<io[i+1]; j++) { 2891 if (garray[jo[j]] > rstart) break; 2892 jj[cnt++] = garray[jo[j]]; 2893 CHKMEMQ; 2894 } 2895 for (k=id[i]; k<id[i+1]; k++) { 2896 if (jd[k] != i) { 2897 jj[cnt++] = rstart + jd[k]; 2898 CHKMEMQ; 2899 } 2900 } 2901 for (;j<io[i+1]; j++) { 2902 jj[cnt++] = garray[jo[j]]; 2903 CHKMEMQ; 2904 } 2905 } 2906 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 2907 PetscFunctionReturn(0); 2908 } 2909 EXTERN_C_END 2910 2911 EXTERN_C_BEGIN 2912 #if defined(PETSC_HAVE_MUMPS) 2913 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 2914 #endif 2915 EXTERN_C_END 2916 2917 /*MC 2918 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2919 2920 Options Database Keys: 2921 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2922 . -mat_block_size <bs> - set the blocksize used to store the matrix 2923 - -mat_use_hash_table <fact> 2924 2925 Level: beginner 2926 2927 .seealso: MatCreateMPIBAIJ 2928 M*/ 2929 2930 EXTERN_C_BEGIN 2931 #undef __FUNCT__ 2932 #define __FUNCT__ "MatCreate_MPIBAIJ" 2933 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 2934 { 2935 Mat_MPIBAIJ *b; 2936 PetscErrorCode ierr; 2937 PetscTruth flg; 2938 2939 PetscFunctionBegin; 2940 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2941 B->data = (void*)b; 2942 2943 2944 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2945 B->mapping = 0; 2946 B->assembled = PETSC_FALSE; 2947 2948 B->insertmode = NOT_SET_VALUES; 2949 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 2950 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 2951 2952 /* build local table of row and column ownerships */ 2953 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 2954 2955 /* build cache for off array entries formed */ 2956 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 2957 b->donotstash = PETSC_FALSE; 2958 b->colmap = PETSC_NULL; 2959 b->garray = PETSC_NULL; 2960 b->roworiented = PETSC_TRUE; 2961 2962 /* stuff used in block assembly */ 2963 b->barray = 0; 2964 2965 /* stuff used for matrix vector multiply */ 2966 b->lvec = 0; 2967 b->Mvctx = 0; 2968 2969 /* stuff for MatGetRow() */ 2970 b->rowindices = 0; 2971 b->rowvalues = 0; 2972 b->getrowactive = PETSC_FALSE; 2973 2974 /* hash table stuff */ 2975 b->ht = 0; 2976 b->hd = 0; 2977 b->ht_size = 0; 2978 b->ht_flag = PETSC_FALSE; 2979 b->ht_fact = 0; 2980 b->ht_total_ct = 0; 2981 b->ht_insert_ct = 0; 2982 2983 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 2984 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2985 if (flg) { 2986 PetscReal fact = 1.39; 2987 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2988 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 2989 if (fact <= 1.0) fact = 1.39; 2990 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2991 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2992 } 2993 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2994 2995 #if defined(PETSC_HAVE_MUMPS) 2996 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr); 2997 #endif 2998 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 2999 "MatConvert_MPIBAIJ_MPIAdj", 3000 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3001 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3002 "MatStoreValues_MPIBAIJ", 3003 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3004 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3005 "MatRetrieveValues_MPIBAIJ", 3006 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3007 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3008 "MatGetDiagonalBlock_MPIBAIJ", 3009 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3010 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3011 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3012 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3013 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3014 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3015 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3016 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3017 "MatDiagonalScaleLocal_MPIBAIJ", 3018 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3019 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3020 "MatSetHashTableFactor_MPIBAIJ", 3021 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3022 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3023 PetscFunctionReturn(0); 3024 } 3025 EXTERN_C_END 3026 3027 /*MC 3028 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3029 3030 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3031 and MATMPIBAIJ otherwise. 3032 3033 Options Database Keys: 3034 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3035 3036 Level: beginner 3037 3038 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3039 M*/ 3040 3041 EXTERN_C_BEGIN 3042 #undef __FUNCT__ 3043 #define __FUNCT__ "MatCreate_BAIJ" 3044 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 3045 { 3046 PetscErrorCode ierr; 3047 PetscMPIInt size; 3048 3049 PetscFunctionBegin; 3050 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 3051 if (size == 1) { 3052 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 3053 } else { 3054 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 3055 } 3056 PetscFunctionReturn(0); 3057 } 3058 EXTERN_C_END 3059 3060 #undef __FUNCT__ 3061 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3062 /*@C 3063 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3064 (block compressed row). For good matrix assembly performance 3065 the user should preallocate the matrix storage by setting the parameters 3066 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3067 performance can be increased by more than a factor of 50. 3068 3069 Collective on Mat 3070 3071 Input Parameters: 3072 + A - the matrix 3073 . bs - size of blockk 3074 . d_nz - number of block nonzeros per block row in diagonal portion of local 3075 submatrix (same for all local rows) 3076 . d_nnz - array containing the number of block nonzeros in the various block rows 3077 of the in diagonal portion of the local (possibly different for each block 3078 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3079 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3080 submatrix (same for all local rows). 3081 - o_nnz - array containing the number of nonzeros in the various block rows of the 3082 off-diagonal portion of the local submatrix (possibly different for 3083 each block row) or PETSC_NULL. 3084 3085 If the *_nnz parameter is given then the *_nz parameter is ignored 3086 3087 Options Database Keys: 3088 + -mat_block_size - size of the blocks to use 3089 - -mat_use_hash_table <fact> 3090 3091 Notes: 3092 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3093 than it must be used on all processors that share the object for that argument. 3094 3095 Storage Information: 3096 For a square global matrix we define each processor's diagonal portion 3097 to be its local rows and the corresponding columns (a square submatrix); 3098 each processor's off-diagonal portion encompasses the remainder of the 3099 local matrix (a rectangular submatrix). 3100 3101 The user can specify preallocated storage for the diagonal part of 3102 the local submatrix with either d_nz or d_nnz (not both). Set 3103 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3104 memory allocation. Likewise, specify preallocated storage for the 3105 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3106 3107 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3108 the figure below we depict these three local rows and all columns (0-11). 3109 3110 .vb 3111 0 1 2 3 4 5 6 7 8 9 10 11 3112 ------------------- 3113 row 3 | o o o d d d o o o o o o 3114 row 4 | o o o d d d o o o o o o 3115 row 5 | o o o d d d o o o o o o 3116 ------------------- 3117 .ve 3118 3119 Thus, any entries in the d locations are stored in the d (diagonal) 3120 submatrix, and any entries in the o locations are stored in the 3121 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3122 stored simply in the MATSEQBAIJ format for compressed row storage. 3123 3124 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3125 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3126 In general, for PDE problems in which most nonzeros are near the diagonal, 3127 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3128 or you will get TERRIBLE performance; see the users' manual chapter on 3129 matrices. 3130 3131 You can call MatGetInfo() to get information on how effective the preallocation was; 3132 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3133 You can also run with the option -info and look for messages with the string 3134 malloc in them to see if additional memory allocation was needed. 3135 3136 Level: intermediate 3137 3138 .keywords: matrix, block, aij, compressed row, sparse, parallel 3139 3140 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 3141 @*/ 3142 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3143 { 3144 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3145 3146 PetscFunctionBegin; 3147 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3148 if (f) { 3149 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3150 } 3151 PetscFunctionReturn(0); 3152 } 3153 3154 #undef __FUNCT__ 3155 #define __FUNCT__ "MatCreateMPIBAIJ" 3156 /*@C 3157 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 3158 (block compressed row). For good matrix assembly performance 3159 the user should preallocate the matrix storage by setting the parameters 3160 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3161 performance can be increased by more than a factor of 50. 3162 3163 Collective on MPI_Comm 3164 3165 Input Parameters: 3166 + comm - MPI communicator 3167 . bs - size of blockk 3168 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3169 This value should be the same as the local size used in creating the 3170 y vector for the matrix-vector product y = Ax. 3171 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3172 This value should be the same as the local size used in creating the 3173 x vector for the matrix-vector product y = Ax. 3174 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3175 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3176 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3177 submatrix (same for all local rows) 3178 . d_nnz - array containing the number of nonzero blocks in the various block rows 3179 of the in diagonal portion of the local (possibly different for each block 3180 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3181 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3182 submatrix (same for all local rows). 3183 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3184 off-diagonal portion of the local submatrix (possibly different for 3185 each block row) or PETSC_NULL. 3186 3187 Output Parameter: 3188 . A - the matrix 3189 3190 Options Database Keys: 3191 + -mat_block_size - size of the blocks to use 3192 - -mat_use_hash_table <fact> 3193 3194 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3195 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3196 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3197 3198 Notes: 3199 If the *_nnz parameter is given then the *_nz parameter is ignored 3200 3201 A nonzero block is any block that as 1 or more nonzeros in it 3202 3203 The user MUST specify either the local or global matrix dimensions 3204 (possibly both). 3205 3206 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3207 than it must be used on all processors that share the object for that argument. 3208 3209 Storage Information: 3210 For a square global matrix we define each processor's diagonal portion 3211 to be its local rows and the corresponding columns (a square submatrix); 3212 each processor's off-diagonal portion encompasses the remainder of the 3213 local matrix (a rectangular submatrix). 3214 3215 The user can specify preallocated storage for the diagonal part of 3216 the local submatrix with either d_nz or d_nnz (not both). Set 3217 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3218 memory allocation. Likewise, specify preallocated storage for the 3219 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3220 3221 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3222 the figure below we depict these three local rows and all columns (0-11). 3223 3224 .vb 3225 0 1 2 3 4 5 6 7 8 9 10 11 3226 ------------------- 3227 row 3 | o o o d d d o o o o o o 3228 row 4 | o o o d d d o o o o o o 3229 row 5 | o o o d d d o o o o o o 3230 ------------------- 3231 .ve 3232 3233 Thus, any entries in the d locations are stored in the d (diagonal) 3234 submatrix, and any entries in the o locations are stored in the 3235 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3236 stored simply in the MATSEQBAIJ format for compressed row storage. 3237 3238 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3239 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3240 In general, for PDE problems in which most nonzeros are near the diagonal, 3241 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3242 or you will get TERRIBLE performance; see the users' manual chapter on 3243 matrices. 3244 3245 Level: intermediate 3246 3247 .keywords: matrix, block, aij, compressed row, sparse, parallel 3248 3249 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3250 @*/ 3251 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3252 { 3253 PetscErrorCode ierr; 3254 PetscMPIInt size; 3255 3256 PetscFunctionBegin; 3257 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3258 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3259 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3260 if (size > 1) { 3261 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3262 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3263 } else { 3264 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3265 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3266 } 3267 PetscFunctionReturn(0); 3268 } 3269 3270 #undef __FUNCT__ 3271 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3272 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3273 { 3274 Mat mat; 3275 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3276 PetscErrorCode ierr; 3277 PetscInt len=0; 3278 3279 PetscFunctionBegin; 3280 *newmat = 0; 3281 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3282 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3283 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3284 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3285 3286 mat->factortype = matin->factortype; 3287 mat->preallocated = PETSC_TRUE; 3288 mat->assembled = PETSC_TRUE; 3289 mat->insertmode = NOT_SET_VALUES; 3290 3291 a = (Mat_MPIBAIJ*)mat->data; 3292 mat->rmap->bs = matin->rmap->bs; 3293 a->bs2 = oldmat->bs2; 3294 a->mbs = oldmat->mbs; 3295 a->nbs = oldmat->nbs; 3296 a->Mbs = oldmat->Mbs; 3297 a->Nbs = oldmat->Nbs; 3298 3299 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3300 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3301 3302 a->size = oldmat->size; 3303 a->rank = oldmat->rank; 3304 a->donotstash = oldmat->donotstash; 3305 a->roworiented = oldmat->roworiented; 3306 a->rowindices = 0; 3307 a->rowvalues = 0; 3308 a->getrowactive = PETSC_FALSE; 3309 a->barray = 0; 3310 a->rstartbs = oldmat->rstartbs; 3311 a->rendbs = oldmat->rendbs; 3312 a->cstartbs = oldmat->cstartbs; 3313 a->cendbs = oldmat->cendbs; 3314 3315 /* hash table stuff */ 3316 a->ht = 0; 3317 a->hd = 0; 3318 a->ht_size = 0; 3319 a->ht_flag = oldmat->ht_flag; 3320 a->ht_fact = oldmat->ht_fact; 3321 a->ht_total_ct = 0; 3322 a->ht_insert_ct = 0; 3323 3324 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3325 if (oldmat->colmap) { 3326 #if defined (PETSC_USE_CTABLE) 3327 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3328 #else 3329 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3330 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3331 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3332 #endif 3333 } else a->colmap = 0; 3334 3335 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3336 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3337 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3338 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3339 } else a->garray = 0; 3340 3341 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3342 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3343 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3344 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3345 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3346 3347 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3348 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3349 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3350 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3351 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3352 *newmat = mat; 3353 3354 PetscFunctionReturn(0); 3355 } 3356 3357 #undef __FUNCT__ 3358 #define __FUNCT__ "MatLoad_MPIBAIJ" 3359 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3360 { 3361 PetscErrorCode ierr; 3362 int fd; 3363 PetscInt i,nz,j,rstart,rend; 3364 PetscScalar *vals,*buf; 3365 MPI_Comm comm = ((PetscObject)viewer)->comm; 3366 MPI_Status status; 3367 PetscMPIInt rank,size,maxnz; 3368 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3369 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3370 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3371 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3372 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3373 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3374 3375 PetscFunctionBegin; 3376 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3377 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3378 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3379 3380 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3381 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3382 if (!rank) { 3383 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3384 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3385 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3386 } 3387 3388 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3389 3390 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3391 M = header[1]; N = header[2]; 3392 3393 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3394 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3395 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3396 3397 /* If global sizes are set, check if they are consistent with that given in the file */ 3398 if (sizesset) { 3399 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3400 } 3401 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3402 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3403 3404 if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices"); 3405 3406 /* 3407 This code adds extra rows to make sure the number of rows is 3408 divisible by the blocksize 3409 */ 3410 Mbs = M/bs; 3411 extra_rows = bs - M + bs*Mbs; 3412 if (extra_rows == bs) extra_rows = 0; 3413 else Mbs++; 3414 if (extra_rows && !rank) { 3415 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3416 } 3417 3418 /* determine ownership of all rows */ 3419 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3420 mbs = Mbs/size + ((Mbs % size) > rank); 3421 m = mbs*bs; 3422 } else { /* User set */ 3423 m = newmat->rmap->n; 3424 mbs = m/bs; 3425 } 3426 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3427 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3428 3429 /* process 0 needs enough room for process with most rows */ 3430 if (!rank) { 3431 mmax = rowners[1]; 3432 for (i=2; i<size; i++) { 3433 mmax = PetscMax(mmax,rowners[i]); 3434 } 3435 mmax*=bs; 3436 } else mmax = m; 3437 3438 rowners[0] = 0; 3439 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3440 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3441 rstart = rowners[rank]; 3442 rend = rowners[rank+1]; 3443 3444 /* distribute row lengths to all processors */ 3445 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3446 if (!rank) { 3447 mend = m; 3448 if (size == 1) mend = mend - extra_rows; 3449 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3450 for (j=mend; j<m; j++) locrowlens[j] = 1; 3451 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3452 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3453 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3454 for (j=0; j<m; j++) { 3455 procsnz[0] += locrowlens[j]; 3456 } 3457 for (i=1; i<size; i++) { 3458 mend = browners[i+1] - browners[i]; 3459 if (i == size-1) mend = mend - extra_rows; 3460 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3461 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3462 /* calculate the number of nonzeros on each processor */ 3463 for (j=0; j<browners[i+1]-browners[i]; j++) { 3464 procsnz[i] += rowlengths[j]; 3465 } 3466 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3467 } 3468 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3469 } else { 3470 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3471 } 3472 3473 if (!rank) { 3474 /* determine max buffer needed and allocate it */ 3475 maxnz = procsnz[0]; 3476 for (i=1; i<size; i++) { 3477 maxnz = PetscMax(maxnz,procsnz[i]); 3478 } 3479 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3480 3481 /* read in my part of the matrix column indices */ 3482 nz = procsnz[0]; 3483 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3484 mycols = ibuf; 3485 if (size == 1) nz -= extra_rows; 3486 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3487 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3488 3489 /* read in every ones (except the last) and ship off */ 3490 for (i=1; i<size-1; i++) { 3491 nz = procsnz[i]; 3492 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3493 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3494 } 3495 /* read in the stuff for the last proc */ 3496 if (size != 1) { 3497 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3498 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3499 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3500 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3501 } 3502 ierr = PetscFree(cols);CHKERRQ(ierr); 3503 } else { 3504 /* determine buffer space needed for message */ 3505 nz = 0; 3506 for (i=0; i<m; i++) { 3507 nz += locrowlens[i]; 3508 } 3509 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3510 mycols = ibuf; 3511 /* receive message of column indices*/ 3512 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3513 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3514 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3515 } 3516 3517 /* loop over local rows, determining number of off diagonal entries */ 3518 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3519 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3520 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3521 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3522 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3523 rowcount = 0; nzcount = 0; 3524 for (i=0; i<mbs; i++) { 3525 dcount = 0; 3526 odcount = 0; 3527 for (j=0; j<bs; j++) { 3528 kmax = locrowlens[rowcount]; 3529 for (k=0; k<kmax; k++) { 3530 tmp = mycols[nzcount++]/bs; 3531 if (!mask[tmp]) { 3532 mask[tmp] = 1; 3533 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3534 else masked1[dcount++] = tmp; 3535 } 3536 } 3537 rowcount++; 3538 } 3539 3540 dlens[i] = dcount; 3541 odlens[i] = odcount; 3542 3543 /* zero out the mask elements we set */ 3544 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3545 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3546 } 3547 3548 3549 if (!sizesset) { 3550 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3551 } 3552 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3553 3554 if (!rank) { 3555 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3556 /* read in my part of the matrix numerical values */ 3557 nz = procsnz[0]; 3558 vals = buf; 3559 mycols = ibuf; 3560 if (size == 1) nz -= extra_rows; 3561 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3562 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3563 3564 /* insert into matrix */ 3565 jj = rstart*bs; 3566 for (i=0; i<m; i++) { 3567 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3568 mycols += locrowlens[i]; 3569 vals += locrowlens[i]; 3570 jj++; 3571 } 3572 /* read in other processors (except the last one) and ship out */ 3573 for (i=1; i<size-1; i++) { 3574 nz = procsnz[i]; 3575 vals = buf; 3576 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3577 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3578 } 3579 /* the last proc */ 3580 if (size != 1){ 3581 nz = procsnz[i] - extra_rows; 3582 vals = buf; 3583 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3584 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3585 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3586 } 3587 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3588 } else { 3589 /* receive numeric values */ 3590 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3591 3592 /* receive message of values*/ 3593 vals = buf; 3594 mycols = ibuf; 3595 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 3596 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 3597 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3598 3599 /* insert into matrix */ 3600 jj = rstart*bs; 3601 for (i=0; i<m; i++) { 3602 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3603 mycols += locrowlens[i]; 3604 vals += locrowlens[i]; 3605 jj++; 3606 } 3607 } 3608 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3609 ierr = PetscFree(buf);CHKERRQ(ierr); 3610 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3611 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3612 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3613 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3614 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3615 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3616 3617 PetscFunctionReturn(0); 3618 } 3619 3620 #undef __FUNCT__ 3621 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3622 /*@ 3623 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3624 3625 Input Parameters: 3626 . mat - the matrix 3627 . fact - factor 3628 3629 Not Collective, each process can use a different factor 3630 3631 Level: advanced 3632 3633 Notes: 3634 This can also be set by the command line option: -mat_use_hash_table <fact> 3635 3636 .keywords: matrix, hashtable, factor, HT 3637 3638 .seealso: MatSetOption() 3639 @*/ 3640 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3641 { 3642 PetscErrorCode ierr,(*f)(Mat,PetscReal); 3643 3644 PetscFunctionBegin; 3645 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 3646 if (f) { 3647 ierr = (*f)(mat,fact);CHKERRQ(ierr); 3648 } 3649 PetscFunctionReturn(0); 3650 } 3651 3652 EXTERN_C_BEGIN 3653 #undef __FUNCT__ 3654 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3655 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3656 { 3657 Mat_MPIBAIJ *baij; 3658 3659 PetscFunctionBegin; 3660 baij = (Mat_MPIBAIJ*)mat->data; 3661 baij->ht_fact = fact; 3662 PetscFunctionReturn(0); 3663 } 3664 EXTERN_C_END 3665 3666 #undef __FUNCT__ 3667 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3668 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3669 { 3670 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3671 PetscFunctionBegin; 3672 *Ad = a->A; 3673 *Ao = a->B; 3674 *colmap = a->garray; 3675 PetscFunctionReturn(0); 3676 } 3677 3678 /* 3679 Special version for direct calls from Fortran (to eliminate two function call overheads 3680 */ 3681 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3682 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3683 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3684 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3685 #endif 3686 3687 #undef __FUNCT__ 3688 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3689 /*@C 3690 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3691 3692 Collective on Mat 3693 3694 Input Parameters: 3695 + mat - the matrix 3696 . min - number of input rows 3697 . im - input rows 3698 . nin - number of input columns 3699 . in - input columns 3700 . v - numerical values input 3701 - addvin - INSERT_VALUES or ADD_VALUES 3702 3703 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3704 3705 Level: advanced 3706 3707 .seealso: MatSetValuesBlocked() 3708 @*/ 3709 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3710 { 3711 /* convert input arguments to C version */ 3712 Mat mat = *matin; 3713 PetscInt m = *min, n = *nin; 3714 InsertMode addv = *addvin; 3715 3716 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3717 const MatScalar *value; 3718 MatScalar *barray=baij->barray; 3719 PetscTruth roworiented = baij->roworiented; 3720 PetscErrorCode ierr; 3721 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3722 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3723 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3724 3725 PetscFunctionBegin; 3726 /* tasks normally handled by MatSetValuesBlocked() */ 3727 if (mat->insertmode == NOT_SET_VALUES) { 3728 mat->insertmode = addv; 3729 } 3730 #if defined(PETSC_USE_DEBUG) 3731 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3732 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3733 #endif 3734 if (mat->assembled) { 3735 mat->was_assembled = PETSC_TRUE; 3736 mat->assembled = PETSC_FALSE; 3737 } 3738 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3739 3740 3741 if(!barray) { 3742 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3743 baij->barray = barray; 3744 } 3745 3746 if (roworiented) { 3747 stepval = (n-1)*bs; 3748 } else { 3749 stepval = (m-1)*bs; 3750 } 3751 for (i=0; i<m; i++) { 3752 if (im[i] < 0) continue; 3753 #if defined(PETSC_USE_DEBUG) 3754 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3755 #endif 3756 if (im[i] >= rstart && im[i] < rend) { 3757 row = im[i] - rstart; 3758 for (j=0; j<n; j++) { 3759 /* If NumCol = 1 then a copy is not required */ 3760 if ((roworiented) && (n == 1)) { 3761 barray = (MatScalar*)v + i*bs2; 3762 } else if((!roworiented) && (m == 1)) { 3763 barray = (MatScalar*)v + j*bs2; 3764 } else { /* Here a copy is required */ 3765 if (roworiented) { 3766 value = v + i*(stepval+bs)*bs + j*bs; 3767 } else { 3768 value = v + j*(stepval+bs)*bs + i*bs; 3769 } 3770 for (ii=0; ii<bs; ii++,value+=stepval) { 3771 for (jj=0; jj<bs; jj++) { 3772 *barray++ = *value++; 3773 } 3774 } 3775 barray -=bs2; 3776 } 3777 3778 if (in[j] >= cstart && in[j] < cend){ 3779 col = in[j] - cstart; 3780 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3781 } 3782 else if (in[j] < 0) continue; 3783 #if defined(PETSC_USE_DEBUG) 3784 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 3785 #endif 3786 else { 3787 if (mat->was_assembled) { 3788 if (!baij->colmap) { 3789 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3790 } 3791 3792 #if defined(PETSC_USE_DEBUG) 3793 #if defined (PETSC_USE_CTABLE) 3794 { PetscInt data; 3795 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3796 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3797 } 3798 #else 3799 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3800 #endif 3801 #endif 3802 #if defined (PETSC_USE_CTABLE) 3803 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3804 col = (col - 1)/bs; 3805 #else 3806 col = (baij->colmap[in[j]] - 1)/bs; 3807 #endif 3808 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3809 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3810 col = in[j]; 3811 } 3812 } 3813 else col = in[j]; 3814 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3815 } 3816 } 3817 } else { 3818 if (!baij->donotstash) { 3819 if (roworiented) { 3820 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3821 } else { 3822 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3823 } 3824 } 3825 } 3826 } 3827 3828 /* task normally handled by MatSetValuesBlocked() */ 3829 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3830 PetscFunctionReturn(0); 3831 } 3832