1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 7 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 8 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 9 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 10 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 11 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 12 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 13 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar); 16 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 19 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 20 { 21 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 22 PetscErrorCode ierr; 23 PetscInt i,*idxb = 0; 24 PetscScalar *va,*vb; 25 Vec vtmp; 26 27 PetscFunctionBegin; 28 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 29 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 30 if (idx) { 31 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 32 } 33 34 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 35 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 36 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 37 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 38 39 for (i=0; i<A->rmap->n; i++){ 40 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 41 } 42 43 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 44 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 45 if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);} 46 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 47 PetscFunctionReturn(0); 48 } 49 50 EXTERN_C_BEGIN 51 #undef __FUNCT__ 52 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 53 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 54 { 55 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 56 PetscErrorCode ierr; 57 58 PetscFunctionBegin; 59 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 60 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 61 PetscFunctionReturn(0); 62 } 63 EXTERN_C_END 64 65 EXTERN_C_BEGIN 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 68 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 69 { 70 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 75 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 76 PetscFunctionReturn(0); 77 } 78 EXTERN_C_END 79 80 /* 81 Local utility routine that creates a mapping from the global column 82 number to the local number in the off-diagonal part of the local 83 storage of the matrix. This is done in a non scalable way since the 84 length of colmap equals the global matrix length. 85 */ 86 #undef __FUNCT__ 87 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 88 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 89 { 90 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 91 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 92 PetscErrorCode ierr; 93 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 94 95 PetscFunctionBegin; 96 #if defined (PETSC_USE_CTABLE) 97 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 98 for (i=0; i<nbs; i++){ 99 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 100 } 101 #else 102 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 103 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 104 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 105 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 106 #endif 107 PetscFunctionReturn(0); 108 } 109 110 #define CHUNKSIZE 10 111 112 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 113 { \ 114 \ 115 brow = row/bs; \ 116 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 117 rmax = aimax[brow]; nrow = ailen[brow]; \ 118 bcol = col/bs; \ 119 ridx = row % bs; cidx = col % bs; \ 120 low = 0; high = nrow; \ 121 while (high-low > 3) { \ 122 t = (low+high)/2; \ 123 if (rp[t] > bcol) high = t; \ 124 else low = t; \ 125 } \ 126 for (_i=low; _i<high; _i++) { \ 127 if (rp[_i] > bcol) break; \ 128 if (rp[_i] == bcol) { \ 129 bap = ap + bs2*_i + bs*cidx + ridx; \ 130 if (addv == ADD_VALUES) *bap += value; \ 131 else *bap = value; \ 132 goto a_noinsert; \ 133 } \ 134 } \ 135 if (a->nonew == 1) goto a_noinsert; \ 136 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 137 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 138 N = nrow++ - 1; \ 139 /* shift up all the later entries in this row */ \ 140 for (ii=N; ii>=_i; ii--) { \ 141 rp[ii+1] = rp[ii]; \ 142 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 143 } \ 144 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 145 rp[_i] = bcol; \ 146 ap[bs2*_i + bs*cidx + ridx] = value; \ 147 a_noinsert:; \ 148 ailen[brow] = nrow; \ 149 } 150 151 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 152 { \ 153 brow = row/bs; \ 154 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 155 rmax = bimax[brow]; nrow = bilen[brow]; \ 156 bcol = col/bs; \ 157 ridx = row % bs; cidx = col % bs; \ 158 low = 0; high = nrow; \ 159 while (high-low > 3) { \ 160 t = (low+high)/2; \ 161 if (rp[t] > bcol) high = t; \ 162 else low = t; \ 163 } \ 164 for (_i=low; _i<high; _i++) { \ 165 if (rp[_i] > bcol) break; \ 166 if (rp[_i] == bcol) { \ 167 bap = ap + bs2*_i + bs*cidx + ridx; \ 168 if (addv == ADD_VALUES) *bap += value; \ 169 else *bap = value; \ 170 goto b_noinsert; \ 171 } \ 172 } \ 173 if (b->nonew == 1) goto b_noinsert; \ 174 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 175 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 176 CHKMEMQ;\ 177 N = nrow++ - 1; \ 178 /* shift up all the later entries in this row */ \ 179 for (ii=N; ii>=_i; ii--) { \ 180 rp[ii+1] = rp[ii]; \ 181 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 182 } \ 183 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 184 rp[_i] = bcol; \ 185 ap[bs2*_i + bs*cidx + ridx] = value; \ 186 b_noinsert:; \ 187 bilen[brow] = nrow; \ 188 } 189 190 #undef __FUNCT__ 191 #define __FUNCT__ "MatSetValues_MPIBAIJ" 192 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 193 { 194 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 195 MatScalar value; 196 PetscTruth roworiented = baij->roworiented; 197 PetscErrorCode ierr; 198 PetscInt i,j,row,col; 199 PetscInt rstart_orig=mat->rmap->rstart; 200 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 201 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 202 203 /* Some Variables required in the macro */ 204 Mat A = baij->A; 205 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 206 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 207 MatScalar *aa=a->a; 208 209 Mat B = baij->B; 210 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 211 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 212 MatScalar *ba=b->a; 213 214 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 215 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 216 MatScalar *ap,*bap; 217 218 PetscFunctionBegin; 219 if (v) PetscValidScalarPointer(v,6); 220 for (i=0; i<m; i++) { 221 if (im[i] < 0) continue; 222 #if defined(PETSC_USE_DEBUG) 223 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 224 #endif 225 if (im[i] >= rstart_orig && im[i] < rend_orig) { 226 row = im[i] - rstart_orig; 227 for (j=0; j<n; j++) { 228 if (in[j] >= cstart_orig && in[j] < cend_orig){ 229 col = in[j] - cstart_orig; 230 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 231 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 232 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 233 } else if (in[j] < 0) continue; 234 #if defined(PETSC_USE_DEBUG) 235 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap->N-1); 236 #endif 237 else { 238 if (mat->was_assembled) { 239 if (!baij->colmap) { 240 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 241 } 242 #if defined (PETSC_USE_CTABLE) 243 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 244 col = col - 1; 245 #else 246 col = baij->colmap[in[j]/bs] - 1; 247 #endif 248 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 249 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 250 col = in[j]; 251 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 252 B = baij->B; 253 b = (Mat_SeqBAIJ*)(B)->data; 254 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 255 ba=b->a; 256 } else col += in[j]%bs; 257 } else col = in[j]; 258 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 259 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!baij->donotstash) { 265 if (roworiented) { 266 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 267 } else { 268 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 269 } 270 } 271 } 272 } 273 PetscFunctionReturn(0); 274 } 275 276 #undef __FUNCT__ 277 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 278 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 279 { 280 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 281 const PetscScalar *value; 282 MatScalar *barray=baij->barray; 283 PetscTruth roworiented = baij->roworiented; 284 PetscErrorCode ierr; 285 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 286 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 287 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 288 289 PetscFunctionBegin; 290 if(!barray) { 291 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 292 baij->barray = barray; 293 } 294 295 if (roworiented) { 296 stepval = (n-1)*bs; 297 } else { 298 stepval = (m-1)*bs; 299 } 300 for (i=0; i<m; i++) { 301 if (im[i] < 0) continue; 302 #if defined(PETSC_USE_DEBUG) 303 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 304 #endif 305 if (im[i] >= rstart && im[i] < rend) { 306 row = im[i] - rstart; 307 for (j=0; j<n; j++) { 308 /* If NumCol = 1 then a copy is not required */ 309 if ((roworiented) && (n == 1)) { 310 barray = (MatScalar*)v + i*bs2; 311 } else if((!roworiented) && (m == 1)) { 312 barray = (MatScalar*)v + j*bs2; 313 } else { /* Here a copy is required */ 314 if (roworiented) { 315 value = v + i*(stepval+bs)*bs + j*bs; 316 } else { 317 value = v + j*(stepval+bs)*bs + i*bs; 318 } 319 for (ii=0; ii<bs; ii++,value+=stepval) { 320 for (jj=0; jj<bs; jj++) { 321 *barray++ = *value++; 322 } 323 } 324 barray -=bs2; 325 } 326 327 if (in[j] >= cstart && in[j] < cend){ 328 col = in[j] - cstart; 329 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 330 } 331 else if (in[j] < 0) continue; 332 #if defined(PETSC_USE_DEBUG) 333 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 334 #endif 335 else { 336 if (mat->was_assembled) { 337 if (!baij->colmap) { 338 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 339 } 340 341 #if defined(PETSC_USE_DEBUG) 342 #if defined (PETSC_USE_CTABLE) 343 { PetscInt data; 344 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 345 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 346 } 347 #else 348 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 349 #endif 350 #endif 351 #if defined (PETSC_USE_CTABLE) 352 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 353 col = (col - 1)/bs; 354 #else 355 col = (baij->colmap[in[j]] - 1)/bs; 356 #endif 357 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 358 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 359 col = in[j]; 360 } 361 } 362 else col = in[j]; 363 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 364 } 365 } 366 } else { 367 if (!baij->donotstash) { 368 if (roworiented) { 369 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 370 } else { 371 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 372 } 373 } 374 } 375 } 376 PetscFunctionReturn(0); 377 } 378 379 #define HASH_KEY 0.6180339887 380 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 381 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 382 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 383 #undef __FUNCT__ 384 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 385 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 386 { 387 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 388 PetscTruth roworiented = baij->roworiented; 389 PetscErrorCode ierr; 390 PetscInt i,j,row,col; 391 PetscInt rstart_orig=mat->rmap->rstart; 392 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 393 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 394 PetscReal tmp; 395 MatScalar **HD = baij->hd,value; 396 #if defined(PETSC_USE_DEBUG) 397 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 398 #endif 399 400 PetscFunctionBegin; 401 if (v) PetscValidScalarPointer(v,6); 402 for (i=0; i<m; i++) { 403 #if defined(PETSC_USE_DEBUG) 404 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 405 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 406 #endif 407 row = im[i]; 408 if (row >= rstart_orig && row < rend_orig) { 409 for (j=0; j<n; j++) { 410 col = in[j]; 411 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 412 /* Look up PetscInto the Hash Table */ 413 key = (row/bs)*Nbs+(col/bs)+1; 414 h1 = HASH(size,key,tmp); 415 416 417 idx = h1; 418 #if defined(PETSC_USE_DEBUG) 419 insert_ct++; 420 total_ct++; 421 if (HT[idx] != key) { 422 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 423 if (idx == size) { 424 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 425 if (idx == h1) { 426 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 427 } 428 } 429 } 430 #else 431 if (HT[idx] != key) { 432 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 433 if (idx == size) { 434 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 435 if (idx == h1) { 436 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 437 } 438 } 439 } 440 #endif 441 /* A HASH table entry is found, so insert the values at the correct address */ 442 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 443 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 444 } 445 } else { 446 if (!baij->donotstash) { 447 if (roworiented) { 448 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 449 } else { 450 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 451 } 452 } 453 } 454 } 455 #if defined(PETSC_USE_DEBUG) 456 baij->ht_total_ct = total_ct; 457 baij->ht_insert_ct = insert_ct; 458 #endif 459 PetscFunctionReturn(0); 460 } 461 462 #undef __FUNCT__ 463 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 464 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 465 { 466 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 467 PetscTruth roworiented = baij->roworiented; 468 PetscErrorCode ierr; 469 PetscInt i,j,ii,jj,row,col; 470 PetscInt rstart=baij->rstartbs; 471 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 472 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 473 PetscReal tmp; 474 MatScalar **HD = baij->hd,*baij_a; 475 const PetscScalar *v_t,*value; 476 #if defined(PETSC_USE_DEBUG) 477 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 478 #endif 479 480 PetscFunctionBegin; 481 482 if (roworiented) { 483 stepval = (n-1)*bs; 484 } else { 485 stepval = (m-1)*bs; 486 } 487 for (i=0; i<m; i++) { 488 #if defined(PETSC_USE_DEBUG) 489 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 490 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 491 #endif 492 row = im[i]; 493 v_t = v + i*nbs2; 494 if (row >= rstart && row < rend) { 495 for (j=0; j<n; j++) { 496 col = in[j]; 497 498 /* Look up into the Hash Table */ 499 key = row*Nbs+col+1; 500 h1 = HASH(size,key,tmp); 501 502 idx = h1; 503 #if defined(PETSC_USE_DEBUG) 504 total_ct++; 505 insert_ct++; 506 if (HT[idx] != key) { 507 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 508 if (idx == size) { 509 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 510 if (idx == h1) { 511 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 512 } 513 } 514 } 515 #else 516 if (HT[idx] != key) { 517 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 518 if (idx == size) { 519 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 520 if (idx == h1) { 521 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 522 } 523 } 524 } 525 #endif 526 baij_a = HD[idx]; 527 if (roworiented) { 528 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 529 /* value = v + (i*(stepval+bs)+j)*bs; */ 530 value = v_t; 531 v_t += bs; 532 if (addv == ADD_VALUES) { 533 for (ii=0; ii<bs; ii++,value+=stepval) { 534 for (jj=ii; jj<bs2; jj+=bs) { 535 baij_a[jj] += *value++; 536 } 537 } 538 } else { 539 for (ii=0; ii<bs; ii++,value+=stepval) { 540 for (jj=ii; jj<bs2; jj+=bs) { 541 baij_a[jj] = *value++; 542 } 543 } 544 } 545 } else { 546 value = v + j*(stepval+bs)*bs + i*bs; 547 if (addv == ADD_VALUES) { 548 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 549 for (jj=0; jj<bs; jj++) { 550 baij_a[jj] += *value++; 551 } 552 } 553 } else { 554 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 555 for (jj=0; jj<bs; jj++) { 556 baij_a[jj] = *value++; 557 } 558 } 559 } 560 } 561 } 562 } else { 563 if (!baij->donotstash) { 564 if (roworiented) { 565 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 566 } else { 567 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 568 } 569 } 570 } 571 } 572 #if defined(PETSC_USE_DEBUG) 573 baij->ht_total_ct = total_ct; 574 baij->ht_insert_ct = insert_ct; 575 #endif 576 PetscFunctionReturn(0); 577 } 578 579 #undef __FUNCT__ 580 #define __FUNCT__ "MatGetValues_MPIBAIJ" 581 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 582 { 583 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 584 PetscErrorCode ierr; 585 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 586 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 587 588 PetscFunctionBegin; 589 for (i=0; i<m; i++) { 590 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 591 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 592 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 593 row = idxm[i] - bsrstart; 594 for (j=0; j<n; j++) { 595 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 596 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 597 if (idxn[j] >= bscstart && idxn[j] < bscend){ 598 col = idxn[j] - bscstart; 599 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 600 } else { 601 if (!baij->colmap) { 602 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 603 } 604 #if defined (PETSC_USE_CTABLE) 605 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 606 data --; 607 #else 608 data = baij->colmap[idxn[j]/bs]-1; 609 #endif 610 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 611 else { 612 col = data + idxn[j]%bs; 613 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 614 } 615 } 616 } 617 } else { 618 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 619 } 620 } 621 PetscFunctionReturn(0); 622 } 623 624 #undef __FUNCT__ 625 #define __FUNCT__ "MatNorm_MPIBAIJ" 626 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 627 { 628 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 629 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 630 PetscErrorCode ierr; 631 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 632 PetscReal sum = 0.0; 633 MatScalar *v; 634 635 PetscFunctionBegin; 636 if (baij->size == 1) { 637 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 638 } else { 639 if (type == NORM_FROBENIUS) { 640 v = amat->a; 641 nz = amat->nz*bs2; 642 for (i=0; i<nz; i++) { 643 #if defined(PETSC_USE_COMPLEX) 644 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 645 #else 646 sum += (*v)*(*v); v++; 647 #endif 648 } 649 v = bmat->a; 650 nz = bmat->nz*bs2; 651 for (i=0; i<nz; i++) { 652 #if defined(PETSC_USE_COMPLEX) 653 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 654 #else 655 sum += (*v)*(*v); v++; 656 #endif 657 } 658 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 659 *nrm = sqrt(*nrm); 660 } else if (type == NORM_1) { /* max column sum */ 661 PetscReal *tmp,*tmp2; 662 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 663 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 664 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 665 v = amat->a; jj = amat->j; 666 for (i=0; i<amat->nz; i++) { 667 for (j=0; j<bs; j++){ 668 col = bs*(cstart + *jj) + j; /* column index */ 669 for (row=0; row<bs; row++){ 670 tmp[col] += PetscAbsScalar(*v); v++; 671 } 672 } 673 jj++; 674 } 675 v = bmat->a; jj = bmat->j; 676 for (i=0; i<bmat->nz; i++) { 677 for (j=0; j<bs; j++){ 678 col = bs*garray[*jj] + j; 679 for (row=0; row<bs; row++){ 680 tmp[col] += PetscAbsScalar(*v); v++; 681 } 682 } 683 jj++; 684 } 685 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 686 *nrm = 0.0; 687 for (j=0; j<mat->cmap->N; j++) { 688 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 689 } 690 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 691 } else if (type == NORM_INFINITY) { /* max row sum */ 692 PetscReal *sums; 693 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 694 sum = 0.0; 695 for (j=0; j<amat->mbs; j++) { 696 for (row=0; row<bs; row++) sums[row] = 0.0; 697 v = amat->a + bs2*amat->i[j]; 698 nz = amat->i[j+1]-amat->i[j]; 699 for (i=0; i<nz; i++) { 700 for (col=0; col<bs; col++){ 701 for (row=0; row<bs; row++){ 702 sums[row] += PetscAbsScalar(*v); v++; 703 } 704 } 705 } 706 v = bmat->a + bs2*bmat->i[j]; 707 nz = bmat->i[j+1]-bmat->i[j]; 708 for (i=0; i<nz; i++) { 709 for (col=0; col<bs; col++){ 710 for (row=0; row<bs; row++){ 711 sums[row] += PetscAbsScalar(*v); v++; 712 } 713 } 714 } 715 for (row=0; row<bs; row++){ 716 if (sums[row] > sum) sum = sums[row]; 717 } 718 } 719 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 720 ierr = PetscFree(sums);CHKERRQ(ierr); 721 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet"); 722 } 723 PetscFunctionReturn(0); 724 } 725 726 /* 727 Creates the hash table, and sets the table 728 This table is created only once. 729 If new entried need to be added to the matrix 730 then the hash table has to be destroyed and 731 recreated. 732 */ 733 #undef __FUNCT__ 734 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 735 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 736 { 737 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 738 Mat A = baij->A,B=baij->B; 739 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 740 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 741 PetscErrorCode ierr; 742 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 743 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 744 PetscInt *HT,key; 745 MatScalar **HD; 746 PetscReal tmp; 747 #if defined(PETSC_USE_INFO) 748 PetscInt ct=0,max=0; 749 #endif 750 751 PetscFunctionBegin; 752 if (baij->ht) PetscFunctionReturn(0); 753 754 baij->ht_size = (PetscInt)(factor*nz); 755 ht_size = baij->ht_size; 756 757 /* Allocate Memory for Hash Table */ 758 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 759 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 760 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 761 HD = baij->hd; 762 HT = baij->ht; 763 764 /* Loop Over A */ 765 for (i=0; i<a->mbs; i++) { 766 for (j=ai[i]; j<ai[i+1]; j++) { 767 row = i+rstart; 768 col = aj[j]+cstart; 769 770 key = row*Nbs + col + 1; 771 h1 = HASH(ht_size,key,tmp); 772 for (k=0; k<ht_size; k++){ 773 if (!HT[(h1+k)%ht_size]) { 774 HT[(h1+k)%ht_size] = key; 775 HD[(h1+k)%ht_size] = a->a + j*bs2; 776 break; 777 #if defined(PETSC_USE_INFO) 778 } else { 779 ct++; 780 #endif 781 } 782 } 783 #if defined(PETSC_USE_INFO) 784 if (k> max) max = k; 785 #endif 786 } 787 } 788 /* Loop Over B */ 789 for (i=0; i<b->mbs; i++) { 790 for (j=bi[i]; j<bi[i+1]; j++) { 791 row = i+rstart; 792 col = garray[bj[j]]; 793 key = row*Nbs + col + 1; 794 h1 = HASH(ht_size,key,tmp); 795 for (k=0; k<ht_size; k++){ 796 if (!HT[(h1+k)%ht_size]) { 797 HT[(h1+k)%ht_size] = key; 798 HD[(h1+k)%ht_size] = b->a + j*bs2; 799 break; 800 #if defined(PETSC_USE_INFO) 801 } else { 802 ct++; 803 #endif 804 } 805 } 806 #if defined(PETSC_USE_INFO) 807 if (k> max) max = k; 808 #endif 809 } 810 } 811 812 /* Print Summary */ 813 #if defined(PETSC_USE_INFO) 814 for (i=0,j=0; i<ht_size; i++) { 815 if (HT[i]) {j++;} 816 } 817 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 818 #endif 819 PetscFunctionReturn(0); 820 } 821 822 #undef __FUNCT__ 823 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 824 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 825 { 826 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 827 PetscErrorCode ierr; 828 PetscInt nstash,reallocs; 829 InsertMode addv; 830 831 PetscFunctionBegin; 832 if (baij->donotstash) { 833 PetscFunctionReturn(0); 834 } 835 836 /* make sure all processors are either in INSERTMODE or ADDMODE */ 837 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 838 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 839 mat->insertmode = addv; /* in case this processor had no cache */ 840 841 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 842 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 843 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 844 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 845 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 846 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 847 PetscFunctionReturn(0); 848 } 849 850 #undef __FUNCT__ 851 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 852 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 853 { 854 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 855 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 856 PetscErrorCode ierr; 857 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 858 PetscInt *row,*col; 859 PetscTruth r1,r2,r3,other_disassembled; 860 MatScalar *val; 861 InsertMode addv = mat->insertmode; 862 PetscMPIInt n; 863 864 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 865 PetscFunctionBegin; 866 if (!baij->donotstash) { 867 while (1) { 868 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 869 if (!flg) break; 870 871 for (i=0; i<n;) { 872 /* Now identify the consecutive vals belonging to the same row */ 873 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 874 if (j < n) ncols = j-i; 875 else ncols = n-i; 876 /* Now assemble all these values with a single function call */ 877 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 878 i = j; 879 } 880 } 881 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 882 /* Now process the block-stash. Since the values are stashed column-oriented, 883 set the roworiented flag to column oriented, and after MatSetValues() 884 restore the original flags */ 885 r1 = baij->roworiented; 886 r2 = a->roworiented; 887 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 888 baij->roworiented = PETSC_FALSE; 889 a->roworiented = PETSC_FALSE; 890 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 891 while (1) { 892 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 893 if (!flg) break; 894 895 for (i=0; i<n;) { 896 /* Now identify the consecutive vals belonging to the same row */ 897 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 898 if (j < n) ncols = j-i; 899 else ncols = n-i; 900 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 901 i = j; 902 } 903 } 904 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 905 baij->roworiented = r1; 906 a->roworiented = r2; 907 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 908 } 909 910 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 911 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 912 913 /* determine if any processor has disassembled, if so we must 914 also disassemble ourselfs, in order that we may reassemble. */ 915 /* 916 if nonzero structure of submatrix B cannot change then we know that 917 no processor disassembled thus we can skip this stuff 918 */ 919 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 920 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 921 if (mat->was_assembled && !other_disassembled) { 922 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 923 } 924 } 925 926 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 927 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 928 } 929 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 930 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 931 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 932 933 #if defined(PETSC_USE_INFO) 934 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 935 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 936 baij->ht_total_ct = 0; 937 baij->ht_insert_ct = 0; 938 } 939 #endif 940 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 941 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 942 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 943 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 944 } 945 946 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 947 baij->rowvalues = 0; 948 PetscFunctionReturn(0); 949 } 950 951 #undef __FUNCT__ 952 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 953 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 954 { 955 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 956 PetscErrorCode ierr; 957 PetscMPIInt size = baij->size,rank = baij->rank; 958 PetscInt bs = mat->rmap->bs; 959 PetscTruth iascii,isdraw; 960 PetscViewer sviewer; 961 PetscViewerFormat format; 962 963 PetscFunctionBegin; 964 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 965 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 966 if (iascii) { 967 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 968 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 969 MatInfo info; 970 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 971 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 972 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 973 rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 974 mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 975 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 976 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 977 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 978 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 979 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 980 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 981 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 982 PetscFunctionReturn(0); 983 } else if (format == PETSC_VIEWER_ASCII_INFO) { 984 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 985 PetscFunctionReturn(0); 986 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 987 PetscFunctionReturn(0); 988 } 989 } 990 991 if (isdraw) { 992 PetscDraw draw; 993 PetscTruth isnull; 994 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 995 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 996 } 997 998 if (size == 1) { 999 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1000 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1001 } else { 1002 /* assemble the entire matrix onto first processor. */ 1003 Mat A; 1004 Mat_SeqBAIJ *Aloc; 1005 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1006 MatScalar *a; 1007 1008 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1009 /* Perhaps this should be the type of mat? */ 1010 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1011 if (!rank) { 1012 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1013 } else { 1014 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1015 } 1016 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1017 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1018 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1019 1020 /* copy over the A part */ 1021 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1022 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1023 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1024 1025 for (i=0; i<mbs; i++) { 1026 rvals[0] = bs*(baij->rstartbs + i); 1027 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1028 for (j=ai[i]; j<ai[i+1]; j++) { 1029 col = (baij->cstartbs+aj[j])*bs; 1030 for (k=0; k<bs; k++) { 1031 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1032 col++; a += bs; 1033 } 1034 } 1035 } 1036 /* copy over the B part */ 1037 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1038 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1039 for (i=0; i<mbs; i++) { 1040 rvals[0] = bs*(baij->rstartbs + i); 1041 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1042 for (j=ai[i]; j<ai[i+1]; j++) { 1043 col = baij->garray[aj[j]]*bs; 1044 for (k=0; k<bs; k++) { 1045 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1046 col++; a += bs; 1047 } 1048 } 1049 } 1050 ierr = PetscFree(rvals);CHKERRQ(ierr); 1051 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1052 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1053 /* 1054 Everyone has to call to draw the matrix since the graphics waits are 1055 synchronized across all processors that share the PetscDraw object 1056 */ 1057 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1058 if (!rank) { 1059 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1060 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1061 } 1062 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1063 ierr = MatDestroy(A);CHKERRQ(ierr); 1064 } 1065 PetscFunctionReturn(0); 1066 } 1067 1068 #undef __FUNCT__ 1069 #define __FUNCT__ "MatView_MPIBAIJ" 1070 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1071 { 1072 PetscErrorCode ierr; 1073 PetscTruth iascii,isdraw,issocket,isbinary; 1074 1075 PetscFunctionBegin; 1076 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1077 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1078 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1079 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1080 if (iascii || isdraw || issocket || isbinary) { 1081 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1082 } else { 1083 SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1084 } 1085 PetscFunctionReturn(0); 1086 } 1087 1088 #undef __FUNCT__ 1089 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1090 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1091 { 1092 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1093 PetscErrorCode ierr; 1094 1095 PetscFunctionBegin; 1096 #if defined(PETSC_USE_LOG) 1097 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1098 #endif 1099 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1100 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1101 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1102 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1103 #if defined (PETSC_USE_CTABLE) 1104 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 1105 #else 1106 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1107 #endif 1108 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1109 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1110 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1111 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1112 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1113 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1114 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1115 ierr = PetscFree(baij);CHKERRQ(ierr); 1116 1117 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1118 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1119 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1120 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1121 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1122 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1123 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1124 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1125 PetscFunctionReturn(0); 1126 } 1127 1128 #undef __FUNCT__ 1129 #define __FUNCT__ "MatMult_MPIBAIJ" 1130 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1131 { 1132 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1133 PetscErrorCode ierr; 1134 PetscInt nt; 1135 1136 PetscFunctionBegin; 1137 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1138 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1139 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1140 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1141 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1142 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1143 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1144 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1145 PetscFunctionReturn(0); 1146 } 1147 1148 #undef __FUNCT__ 1149 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1150 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1151 { 1152 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1153 PetscErrorCode ierr; 1154 1155 PetscFunctionBegin; 1156 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1157 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1158 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1159 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1160 PetscFunctionReturn(0); 1161 } 1162 1163 #undef __FUNCT__ 1164 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1165 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1166 { 1167 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1168 PetscErrorCode ierr; 1169 PetscTruth merged; 1170 1171 PetscFunctionBegin; 1172 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1173 /* do nondiagonal part */ 1174 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1175 if (!merged) { 1176 /* send it on its way */ 1177 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1178 /* do local part */ 1179 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1180 /* receive remote parts: note this assumes the values are not actually */ 1181 /* inserted in yy until the next line */ 1182 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1183 } else { 1184 /* do local part */ 1185 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1186 /* send it on its way */ 1187 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1188 /* values actually were received in the Begin() but we need to call this nop */ 1189 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1190 } 1191 PetscFunctionReturn(0); 1192 } 1193 1194 #undef __FUNCT__ 1195 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1196 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1197 { 1198 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1199 PetscErrorCode ierr; 1200 1201 PetscFunctionBegin; 1202 /* do nondiagonal part */ 1203 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1204 /* send it on its way */ 1205 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1206 /* do local part */ 1207 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1208 /* receive remote parts: note this assumes the values are not actually */ 1209 /* inserted in yy until the next line, which is true for my implementation*/ 1210 /* but is not perhaps always true. */ 1211 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1212 PetscFunctionReturn(0); 1213 } 1214 1215 /* 1216 This only works correctly for square matrices where the subblock A->A is the 1217 diagonal block 1218 */ 1219 #undef __FUNCT__ 1220 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1221 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1222 { 1223 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1224 PetscErrorCode ierr; 1225 1226 PetscFunctionBegin; 1227 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1228 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1229 PetscFunctionReturn(0); 1230 } 1231 1232 #undef __FUNCT__ 1233 #define __FUNCT__ "MatScale_MPIBAIJ" 1234 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1235 { 1236 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1237 PetscErrorCode ierr; 1238 1239 PetscFunctionBegin; 1240 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1241 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1242 PetscFunctionReturn(0); 1243 } 1244 1245 #undef __FUNCT__ 1246 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1247 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1248 { 1249 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1250 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1251 PetscErrorCode ierr; 1252 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1253 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1254 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1255 1256 PetscFunctionBegin; 1257 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1258 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1259 mat->getrowactive = PETSC_TRUE; 1260 1261 if (!mat->rowvalues && (idx || v)) { 1262 /* 1263 allocate enough space to hold information from the longest row. 1264 */ 1265 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1266 PetscInt max = 1,mbs = mat->mbs,tmp; 1267 for (i=0; i<mbs; i++) { 1268 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1269 if (max < tmp) { max = tmp; } 1270 } 1271 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1272 } 1273 lrow = row - brstart; 1274 1275 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1276 if (!v) {pvA = 0; pvB = 0;} 1277 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1278 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1279 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1280 nztot = nzA + nzB; 1281 1282 cmap = mat->garray; 1283 if (v || idx) { 1284 if (nztot) { 1285 /* Sort by increasing column numbers, assuming A and B already sorted */ 1286 PetscInt imark = -1; 1287 if (v) { 1288 *v = v_p = mat->rowvalues; 1289 for (i=0; i<nzB; i++) { 1290 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1291 else break; 1292 } 1293 imark = i; 1294 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1295 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1296 } 1297 if (idx) { 1298 *idx = idx_p = mat->rowindices; 1299 if (imark > -1) { 1300 for (i=0; i<imark; i++) { 1301 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1302 } 1303 } else { 1304 for (i=0; i<nzB; i++) { 1305 if (cmap[cworkB[i]/bs] < cstart) 1306 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1307 else break; 1308 } 1309 imark = i; 1310 } 1311 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1312 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1313 } 1314 } else { 1315 if (idx) *idx = 0; 1316 if (v) *v = 0; 1317 } 1318 } 1319 *nz = nztot; 1320 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1321 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1322 PetscFunctionReturn(0); 1323 } 1324 1325 #undef __FUNCT__ 1326 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1327 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1328 { 1329 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1330 1331 PetscFunctionBegin; 1332 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1333 baij->getrowactive = PETSC_FALSE; 1334 PetscFunctionReturn(0); 1335 } 1336 1337 #undef __FUNCT__ 1338 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1339 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1340 { 1341 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1342 PetscErrorCode ierr; 1343 1344 PetscFunctionBegin; 1345 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1346 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1347 PetscFunctionReturn(0); 1348 } 1349 1350 #undef __FUNCT__ 1351 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1352 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1353 { 1354 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1355 Mat A = a->A,B = a->B; 1356 PetscErrorCode ierr; 1357 PetscReal isend[5],irecv[5]; 1358 1359 PetscFunctionBegin; 1360 info->block_size = (PetscReal)matin->rmap->bs; 1361 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1362 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1363 isend[3] = info->memory; isend[4] = info->mallocs; 1364 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1365 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1366 isend[3] += info->memory; isend[4] += info->mallocs; 1367 if (flag == MAT_LOCAL) { 1368 info->nz_used = isend[0]; 1369 info->nz_allocated = isend[1]; 1370 info->nz_unneeded = isend[2]; 1371 info->memory = isend[3]; 1372 info->mallocs = isend[4]; 1373 } else if (flag == MAT_GLOBAL_MAX) { 1374 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1375 info->nz_used = irecv[0]; 1376 info->nz_allocated = irecv[1]; 1377 info->nz_unneeded = irecv[2]; 1378 info->memory = irecv[3]; 1379 info->mallocs = irecv[4]; 1380 } else if (flag == MAT_GLOBAL_SUM) { 1381 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1382 info->nz_used = irecv[0]; 1383 info->nz_allocated = irecv[1]; 1384 info->nz_unneeded = irecv[2]; 1385 info->memory = irecv[3]; 1386 info->mallocs = irecv[4]; 1387 } else { 1388 SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1389 } 1390 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1391 info->fill_ratio_needed = 0; 1392 info->factor_mallocs = 0; 1393 PetscFunctionReturn(0); 1394 } 1395 1396 #undef __FUNCT__ 1397 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1398 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg) 1399 { 1400 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1401 PetscErrorCode ierr; 1402 1403 PetscFunctionBegin; 1404 switch (op) { 1405 case MAT_NEW_NONZERO_LOCATIONS: 1406 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1407 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1408 case MAT_KEEP_NONZERO_PATTERN: 1409 case MAT_NEW_NONZERO_LOCATION_ERR: 1410 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1411 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1412 break; 1413 case MAT_ROW_ORIENTED: 1414 a->roworiented = flg; 1415 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1416 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1417 break; 1418 case MAT_NEW_DIAGONALS: 1419 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1420 break; 1421 case MAT_IGNORE_OFF_PROC_ENTRIES: 1422 a->donotstash = flg; 1423 break; 1424 case MAT_USE_HASH_TABLE: 1425 a->ht_flag = flg; 1426 break; 1427 case MAT_SYMMETRIC: 1428 case MAT_STRUCTURALLY_SYMMETRIC: 1429 case MAT_HERMITIAN: 1430 case MAT_SYMMETRY_ETERNAL: 1431 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1432 break; 1433 default: 1434 SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op); 1435 } 1436 PetscFunctionReturn(0); 1437 } 1438 1439 #undef __FUNCT__ 1440 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1441 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1442 { 1443 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1444 Mat_SeqBAIJ *Aloc; 1445 Mat B; 1446 PetscErrorCode ierr; 1447 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1448 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1449 MatScalar *a; 1450 1451 PetscFunctionBegin; 1452 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1453 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1454 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1455 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1456 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1457 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1458 } else { 1459 B = *matout; 1460 } 1461 1462 /* copy over the A part */ 1463 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1464 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1465 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1466 1467 for (i=0; i<mbs; i++) { 1468 rvals[0] = bs*(baij->rstartbs + i); 1469 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1470 for (j=ai[i]; j<ai[i+1]; j++) { 1471 col = (baij->cstartbs+aj[j])*bs; 1472 for (k=0; k<bs; k++) { 1473 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1474 col++; a += bs; 1475 } 1476 } 1477 } 1478 /* copy over the B part */ 1479 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1480 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1481 for (i=0; i<mbs; i++) { 1482 rvals[0] = bs*(baij->rstartbs + i); 1483 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1484 for (j=ai[i]; j<ai[i+1]; j++) { 1485 col = baij->garray[aj[j]]*bs; 1486 for (k=0; k<bs; k++) { 1487 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1488 col++; a += bs; 1489 } 1490 } 1491 } 1492 ierr = PetscFree(rvals);CHKERRQ(ierr); 1493 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1494 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1495 1496 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1497 *matout = B; 1498 } else { 1499 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1500 } 1501 PetscFunctionReturn(0); 1502 } 1503 1504 #undef __FUNCT__ 1505 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1506 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1507 { 1508 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1509 Mat a = baij->A,b = baij->B; 1510 PetscErrorCode ierr; 1511 PetscInt s1,s2,s3; 1512 1513 PetscFunctionBegin; 1514 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1515 if (rr) { 1516 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1517 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1518 /* Overlap communication with computation. */ 1519 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1520 } 1521 if (ll) { 1522 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1523 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1524 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1525 } 1526 /* scale the diagonal block */ 1527 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1528 1529 if (rr) { 1530 /* Do a scatter end and then right scale the off-diagonal block */ 1531 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1532 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1533 } 1534 1535 PetscFunctionReturn(0); 1536 } 1537 1538 #undef __FUNCT__ 1539 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1540 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 1541 { 1542 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1543 PetscErrorCode ierr; 1544 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1545 PetscInt i,*owners = A->rmap->range; 1546 PetscInt *nprocs,j,idx,nsends,row; 1547 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1548 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1549 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1550 MPI_Comm comm = ((PetscObject)A)->comm; 1551 MPI_Request *send_waits,*recv_waits; 1552 MPI_Status recv_status,*send_status; 1553 #if defined(PETSC_DEBUG) 1554 PetscTruth found = PETSC_FALSE; 1555 #endif 1556 1557 PetscFunctionBegin; 1558 /* first count number of contributors to each processor */ 1559 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1560 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1561 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1562 j = 0; 1563 for (i=0; i<N; i++) { 1564 if (lastidx > (idx = rows[i])) j = 0; 1565 lastidx = idx; 1566 for (; j<size; j++) { 1567 if (idx >= owners[j] && idx < owners[j+1]) { 1568 nprocs[2*j]++; 1569 nprocs[2*j+1] = 1; 1570 owner[i] = j; 1571 #if defined(PETSC_DEBUG) 1572 found = PETSC_TRUE; 1573 #endif 1574 break; 1575 } 1576 } 1577 #if defined(PETSC_DEBUG) 1578 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1579 found = PETSC_FALSE; 1580 #endif 1581 } 1582 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1583 1584 /* inform other processors of number of messages and max length*/ 1585 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1586 1587 /* post receives: */ 1588 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1589 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1590 for (i=0; i<nrecvs; i++) { 1591 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1592 } 1593 1594 /* do sends: 1595 1) starts[i] gives the starting index in svalues for stuff going to 1596 the ith processor 1597 */ 1598 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1599 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1600 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1601 starts[0] = 0; 1602 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1603 for (i=0; i<N; i++) { 1604 svalues[starts[owner[i]]++] = rows[i]; 1605 } 1606 1607 starts[0] = 0; 1608 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1609 count = 0; 1610 for (i=0; i<size; i++) { 1611 if (nprocs[2*i+1]) { 1612 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1613 } 1614 } 1615 ierr = PetscFree(starts);CHKERRQ(ierr); 1616 1617 base = owners[rank]; 1618 1619 /* wait on receives */ 1620 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1621 count = nrecvs; 1622 slen = 0; 1623 while (count) { 1624 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1625 /* unpack receives into our local space */ 1626 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1627 source[imdex] = recv_status.MPI_SOURCE; 1628 lens[imdex] = n; 1629 slen += n; 1630 count--; 1631 } 1632 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1633 1634 /* move the data into the send scatter */ 1635 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1636 count = 0; 1637 for (i=0; i<nrecvs; i++) { 1638 values = rvalues + i*nmax; 1639 for (j=0; j<lens[i]; j++) { 1640 lrows[count++] = values[j] - base; 1641 } 1642 } 1643 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1644 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1645 ierr = PetscFree(owner);CHKERRQ(ierr); 1646 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1647 1648 /* actually zap the local rows */ 1649 /* 1650 Zero the required rows. If the "diagonal block" of the matrix 1651 is square and the user wishes to set the diagonal we use separate 1652 code so that MatSetValues() is not called for each diagonal allocating 1653 new memory, thus calling lots of mallocs and slowing things down. 1654 1655 */ 1656 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1657 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr); 1658 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1659 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr); 1660 } else if (diag != 0.0) { 1661 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1662 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1663 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1664 for (i=0; i<slen; i++) { 1665 row = lrows[i] + rstart_bs; 1666 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1667 } 1668 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1669 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1670 } else { 1671 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1672 } 1673 1674 ierr = PetscFree(lrows);CHKERRQ(ierr); 1675 1676 /* wait on sends */ 1677 if (nsends) { 1678 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1679 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1680 ierr = PetscFree(send_status);CHKERRQ(ierr); 1681 } 1682 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1683 ierr = PetscFree(svalues);CHKERRQ(ierr); 1684 1685 PetscFunctionReturn(0); 1686 } 1687 1688 #undef __FUNCT__ 1689 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1690 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1691 { 1692 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1693 PetscErrorCode ierr; 1694 1695 PetscFunctionBegin; 1696 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1697 PetscFunctionReturn(0); 1698 } 1699 1700 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1701 1702 #undef __FUNCT__ 1703 #define __FUNCT__ "MatEqual_MPIBAIJ" 1704 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1705 { 1706 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1707 Mat a,b,c,d; 1708 PetscTruth flg; 1709 PetscErrorCode ierr; 1710 1711 PetscFunctionBegin; 1712 a = matA->A; b = matA->B; 1713 c = matB->A; d = matB->B; 1714 1715 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1716 if (flg) { 1717 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1718 } 1719 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1720 PetscFunctionReturn(0); 1721 } 1722 1723 #undef __FUNCT__ 1724 #define __FUNCT__ "MatCopy_MPIBAIJ" 1725 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1726 { 1727 PetscErrorCode ierr; 1728 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1729 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1730 1731 PetscFunctionBegin; 1732 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1733 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1734 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1735 } else { 1736 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1737 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1738 } 1739 PetscFunctionReturn(0); 1740 } 1741 1742 #undef __FUNCT__ 1743 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1744 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1745 { 1746 PetscErrorCode ierr; 1747 1748 PetscFunctionBegin; 1749 ierr = MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1750 PetscFunctionReturn(0); 1751 } 1752 1753 #undef __FUNCT__ 1754 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1755 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1756 { 1757 PetscErrorCode ierr; 1758 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1759 PetscBLASInt bnz,one=1; 1760 Mat_SeqBAIJ *x,*y; 1761 1762 PetscFunctionBegin; 1763 if (str == SAME_NONZERO_PATTERN) { 1764 PetscScalar alpha = a; 1765 x = (Mat_SeqBAIJ *)xx->A->data; 1766 y = (Mat_SeqBAIJ *)yy->A->data; 1767 bnz = PetscBLASIntCast(x->nz); 1768 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1769 x = (Mat_SeqBAIJ *)xx->B->data; 1770 y = (Mat_SeqBAIJ *)yy->B->data; 1771 bnz = PetscBLASIntCast(x->nz); 1772 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1773 } else { 1774 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1775 } 1776 PetscFunctionReturn(0); 1777 } 1778 1779 #undef __FUNCT__ 1780 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ" 1781 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs) 1782 { 1783 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1784 PetscInt rbs,cbs; 1785 PetscErrorCode ierr; 1786 1787 PetscFunctionBegin; 1788 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1789 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1790 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 1791 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 1792 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 1793 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 1794 PetscFunctionReturn(0); 1795 } 1796 1797 #undef __FUNCT__ 1798 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1799 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1800 { 1801 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1802 PetscErrorCode ierr; 1803 1804 PetscFunctionBegin; 1805 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1806 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1807 PetscFunctionReturn(0); 1808 } 1809 1810 #undef __FUNCT__ 1811 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 1812 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1813 { 1814 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1815 PetscErrorCode ierr; 1816 1817 PetscFunctionBegin; 1818 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1819 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1820 PetscFunctionReturn(0); 1821 } 1822 1823 #undef __FUNCT__ 1824 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 1825 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1826 { 1827 PetscErrorCode ierr; 1828 IS iscol_local; 1829 PetscInt csize; 1830 1831 PetscFunctionBegin; 1832 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1833 if (call == MAT_REUSE_MATRIX) { 1834 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1835 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1836 } else { 1837 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1838 } 1839 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1840 if (call == MAT_INITIAL_MATRIX) { 1841 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1842 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 1843 } 1844 PetscFunctionReturn(0); 1845 } 1846 1847 #undef __FUNCT__ 1848 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 1849 /* 1850 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1851 in local and then by concatenating the local matrices the end result. 1852 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 1853 */ 1854 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 1855 { 1856 PetscErrorCode ierr; 1857 PetscMPIInt rank,size; 1858 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 1859 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 1860 Mat *local,M,Mreuse; 1861 MatScalar *vwork,*aa; 1862 MPI_Comm comm = ((PetscObject)mat)->comm; 1863 Mat_SeqBAIJ *aij; 1864 1865 1866 PetscFunctionBegin; 1867 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1868 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1869 1870 if (call == MAT_REUSE_MATRIX) { 1871 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1872 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1873 local = &Mreuse; 1874 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1875 } else { 1876 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1877 Mreuse = *local; 1878 ierr = PetscFree(local);CHKERRQ(ierr); 1879 } 1880 1881 /* 1882 m - number of local rows 1883 n - number of columns (same on all processors) 1884 rstart - first row in new global matrix generated 1885 */ 1886 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 1887 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1888 m = m/bs; 1889 n = n/bs; 1890 1891 if (call == MAT_INITIAL_MATRIX) { 1892 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1893 ii = aij->i; 1894 jj = aij->j; 1895 1896 /* 1897 Determine the number of non-zeros in the diagonal and off-diagonal 1898 portions of the matrix in order to do correct preallocation 1899 */ 1900 1901 /* first get start and end of "diagonal" columns */ 1902 if (csize == PETSC_DECIDE) { 1903 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 1904 if (mglobal == n*bs) { /* square matrix */ 1905 nlocal = m; 1906 } else { 1907 nlocal = n/size + ((n % size) > rank); 1908 } 1909 } else { 1910 nlocal = csize/bs; 1911 } 1912 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1913 rstart = rend - nlocal; 1914 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 1915 1916 /* next, compute all the lengths */ 1917 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 1918 olens = dlens + m; 1919 for (i=0; i<m; i++) { 1920 jend = ii[i+1] - ii[i]; 1921 olen = 0; 1922 dlen = 0; 1923 for (j=0; j<jend; j++) { 1924 if (*jj < rstart || *jj >= rend) olen++; 1925 else dlen++; 1926 jj++; 1927 } 1928 olens[i] = olen; 1929 dlens[i] = dlen; 1930 } 1931 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 1932 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 1933 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 1934 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 1935 ierr = PetscFree(dlens);CHKERRQ(ierr); 1936 } else { 1937 PetscInt ml,nl; 1938 1939 M = *newmat; 1940 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1941 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1942 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1943 /* 1944 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1945 rather than the slower MatSetValues(). 1946 */ 1947 M->was_assembled = PETSC_TRUE; 1948 M->assembled = PETSC_FALSE; 1949 } 1950 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1951 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 1952 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1953 ii = aij->i; 1954 jj = aij->j; 1955 aa = aij->a; 1956 for (i=0; i<m; i++) { 1957 row = rstart/bs + i; 1958 nz = ii[i+1] - ii[i]; 1959 cwork = jj; jj += nz; 1960 vwork = aa; aa += nz; 1961 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 1962 } 1963 1964 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1965 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1966 *newmat = M; 1967 1968 /* save submatrix used in processor for next request */ 1969 if (call == MAT_INITIAL_MATRIX) { 1970 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 1971 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 1972 } 1973 1974 PetscFunctionReturn(0); 1975 } 1976 1977 #undef __FUNCT__ 1978 #define __FUNCT__ "MatPermute_MPIBAIJ" 1979 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 1980 { 1981 MPI_Comm comm,pcomm; 1982 PetscInt first,local_size,nrows; 1983 const PetscInt *rows; 1984 PetscMPIInt size; 1985 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1986 PetscErrorCode ierr; 1987 1988 PetscFunctionBegin; 1989 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1990 /* make a collective version of 'rowp' */ 1991 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 1992 if (pcomm==comm) { 1993 crowp = rowp; 1994 } else { 1995 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 1996 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 1997 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 1998 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 1999 } 2000 /* collect the global row permutation and invert it */ 2001 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2002 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2003 if (pcomm!=comm) { 2004 ierr = ISDestroy(crowp);CHKERRQ(ierr); 2005 } 2006 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2007 /* get the local target indices */ 2008 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2009 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 2010 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2011 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 2012 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2013 ierr = ISDestroy(irowp);CHKERRQ(ierr); 2014 /* the column permutation is so much easier; 2015 make a local version of 'colp' and invert it */ 2016 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2017 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2018 if (size==1) { 2019 lcolp = colp; 2020 } else { 2021 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 2022 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 2023 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 2024 } 2025 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2026 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 2027 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 2028 if (size>1) { 2029 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 2030 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 2031 } 2032 /* now we just get the submatrix */ 2033 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2034 /* clean up */ 2035 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 2036 ierr = ISDestroy(icolp);CHKERRQ(ierr); 2037 PetscFunctionReturn(0); 2038 } 2039 2040 #undef __FUNCT__ 2041 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2042 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2043 { 2044 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2045 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2046 2047 PetscFunctionBegin; 2048 if (nghosts) { *nghosts = B->nbs;} 2049 if (ghosts) {*ghosts = baij->garray;} 2050 PetscFunctionReturn(0); 2051 } 2052 2053 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat); 2054 2055 #undef __FUNCT__ 2056 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2057 /* 2058 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2059 */ 2060 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2061 { 2062 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2063 PetscErrorCode ierr; 2064 PetscMPIInt size,*ncolsonproc,*disp,nn; 2065 PetscInt bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col; 2066 const PetscInt *is; 2067 PetscInt nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj; 2068 PetscInt *rowhit,M,cstart,cend,colb; 2069 PetscInt *columnsforrow,l; 2070 IS *isa; 2071 PetscTruth done,flg; 2072 ISLocalToGlobalMapping map = mat->bmapping; 2073 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2074 2075 PetscFunctionBegin; 2076 if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2077 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2078 2079 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2080 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2081 M = mat->rmap->n/bs; 2082 cstart = mat->cmap->rstart/bs; 2083 cend = mat->cmap->rend/bs; 2084 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2085 c->N = mat->cmap->N/bs; 2086 c->m = mat->rmap->n/bs; 2087 c->rstart = mat->rmap->rstart/bs; 2088 2089 c->ncolors = nis; 2090 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2091 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2092 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2093 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2094 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2095 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2096 2097 /* Allow access to data structures of local part of matrix */ 2098 if (!baij->colmap) { 2099 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2100 } 2101 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2102 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2103 2104 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2105 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2106 2107 for (i=0; i<nis; i++) { 2108 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2109 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2110 c->ncolumns[i] = n; 2111 if (n) { 2112 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2113 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2114 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2115 } else { 2116 c->columns[i] = 0; 2117 } 2118 2119 if (ctype == IS_COLORING_GLOBAL){ 2120 /* Determine the total (parallel) number of columns of this color */ 2121 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2122 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2123 2124 nn = PetscMPIIntCast(n); 2125 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2126 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2127 if (!nctot) { 2128 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2129 } 2130 2131 disp[0] = 0; 2132 for (j=1; j<size; j++) { 2133 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2134 } 2135 2136 /* Get complete list of columns for color on each processor */ 2137 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2138 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2139 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2140 } else if (ctype == IS_COLORING_GHOSTED){ 2141 /* Determine local number of columns of this color on this process, including ghost points */ 2142 nctot = n; 2143 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2144 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2145 } else { 2146 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2147 } 2148 2149 /* 2150 Mark all rows affect by these columns 2151 */ 2152 /* Temporary option to allow for debugging/testing */ 2153 flg = PETSC_FALSE; 2154 ierr = PetscOptionsGetTruth(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2155 if (!flg) {/*-----------------------------------------------------------------------------*/ 2156 /* crude, fast version */ 2157 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2158 /* loop over columns*/ 2159 for (j=0; j<nctot; j++) { 2160 if (ctype == IS_COLORING_GHOSTED) { 2161 col = ltog[cols[j]]; 2162 } else { 2163 col = cols[j]; 2164 } 2165 if (col >= cstart && col < cend) { 2166 /* column is in diagonal block of matrix */ 2167 rows = A_cj + A_ci[col-cstart]; 2168 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2169 } else { 2170 #if defined (PETSC_USE_CTABLE) 2171 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2172 colb --; 2173 #else 2174 colb = baij->colmap[col] - 1; 2175 #endif 2176 if (colb == -1) { 2177 m = 0; 2178 } else { 2179 colb = colb/bs; 2180 rows = B_cj + B_ci[colb]; 2181 m = B_ci[colb+1] - B_ci[colb]; 2182 } 2183 } 2184 /* loop over columns marking them in rowhit */ 2185 for (k=0; k<m; k++) { 2186 rowhit[*rows++] = col + 1; 2187 } 2188 } 2189 2190 /* count the number of hits */ 2191 nrows = 0; 2192 for (j=0; j<M; j++) { 2193 if (rowhit[j]) nrows++; 2194 } 2195 c->nrows[i] = nrows; 2196 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2197 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2198 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2199 nrows = 0; 2200 for (j=0; j<M; j++) { 2201 if (rowhit[j]) { 2202 c->rows[i][nrows] = j; 2203 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2204 nrows++; 2205 } 2206 } 2207 } else {/*-------------------------------------------------------------------------------*/ 2208 /* slow version, using rowhit as a linked list */ 2209 PetscInt currentcol,fm,mfm; 2210 rowhit[M] = M; 2211 nrows = 0; 2212 /* loop over columns*/ 2213 for (j=0; j<nctot; j++) { 2214 if (ctype == IS_COLORING_GHOSTED) { 2215 col = ltog[cols[j]]; 2216 } else { 2217 col = cols[j]; 2218 } 2219 if (col >= cstart && col < cend) { 2220 /* column is in diagonal block of matrix */ 2221 rows = A_cj + A_ci[col-cstart]; 2222 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2223 } else { 2224 #if defined (PETSC_USE_CTABLE) 2225 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2226 colb --; 2227 #else 2228 colb = baij->colmap[col] - 1; 2229 #endif 2230 if (colb == -1) { 2231 m = 0; 2232 } else { 2233 colb = colb/bs; 2234 rows = B_cj + B_ci[colb]; 2235 m = B_ci[colb+1] - B_ci[colb]; 2236 } 2237 } 2238 2239 /* loop over columns marking them in rowhit */ 2240 fm = M; /* fm points to first entry in linked list */ 2241 for (k=0; k<m; k++) { 2242 currentcol = *rows++; 2243 /* is it already in the list? */ 2244 do { 2245 mfm = fm; 2246 fm = rowhit[fm]; 2247 } while (fm < currentcol); 2248 /* not in list so add it */ 2249 if (fm != currentcol) { 2250 nrows++; 2251 columnsforrow[currentcol] = col; 2252 /* next three lines insert new entry into linked list */ 2253 rowhit[mfm] = currentcol; 2254 rowhit[currentcol] = fm; 2255 fm = currentcol; 2256 /* fm points to present position in list since we know the columns are sorted */ 2257 } else { 2258 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2259 } 2260 } 2261 } 2262 c->nrows[i] = nrows; 2263 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2264 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2265 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2266 /* now store the linked list of rows into c->rows[i] */ 2267 nrows = 0; 2268 fm = rowhit[M]; 2269 do { 2270 c->rows[i][nrows] = fm; 2271 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2272 fm = rowhit[fm]; 2273 } while (fm < M); 2274 } /* ---------------------------------------------------------------------------------------*/ 2275 ierr = PetscFree(cols);CHKERRQ(ierr); 2276 } 2277 2278 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2279 /* 2280 vscale will contain the "diagonal" on processor scalings followed by the off processor 2281 */ 2282 if (ctype == IS_COLORING_GLOBAL) { 2283 PetscInt *garray; 2284 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2285 for (i=0; i<baij->B->cmap->n/bs; i++) { 2286 for (j=0; j<bs; j++) { 2287 garray[i*bs+j] = bs*baij->garray[i]+j; 2288 } 2289 } 2290 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2291 ierr = PetscFree(garray);CHKERRQ(ierr); 2292 CHKMEMQ; 2293 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2294 for (k=0; k<c->ncolors; k++) { 2295 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2296 for (l=0; l<c->nrows[k]; l++) { 2297 col = c->columnsforrow[k][l]; 2298 if (col >= cstart && col < cend) { 2299 /* column is in diagonal block of matrix */ 2300 colb = col - cstart; 2301 } else { 2302 /* column is in "off-processor" part */ 2303 #if defined (PETSC_USE_CTABLE) 2304 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2305 colb --; 2306 #else 2307 colb = baij->colmap[col] - 1; 2308 #endif 2309 colb = colb/bs; 2310 colb += cend - cstart; 2311 } 2312 c->vscaleforrow[k][l] = colb; 2313 } 2314 } 2315 } else if (ctype == IS_COLORING_GHOSTED) { 2316 /* Get gtol mapping */ 2317 PetscInt N = mat->cmap->N, *gtol; 2318 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2319 for (i=0; i<N; i++) gtol[i] = -1; 2320 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2321 2322 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2323 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2324 for (k=0; k<c->ncolors; k++) { 2325 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2326 for (l=0; l<c->nrows[k]; l++) { 2327 col = c->columnsforrow[k][l]; /* global column index */ 2328 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2329 } 2330 } 2331 ierr = PetscFree(gtol);CHKERRQ(ierr); 2332 } 2333 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2334 2335 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2336 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2337 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2338 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2339 CHKMEMQ; 2340 PetscFunctionReturn(0); 2341 } 2342 2343 #undef __FUNCT__ 2344 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ" 2345 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat) 2346 { 2347 Mat B; 2348 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2349 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2350 Mat_SeqAIJ *b; 2351 PetscErrorCode ierr; 2352 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2353 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2354 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2355 2356 PetscFunctionBegin; 2357 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2358 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2359 2360 /* ---------------------------------------------------------------- 2361 Tell every processor the number of nonzeros per row 2362 */ 2363 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2364 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2365 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2366 } 2367 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2368 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2369 displs = recvcounts + size; 2370 for (i=0; i<size; i++) { 2371 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2372 displs[i] = A->rmap->range[i]/bs; 2373 } 2374 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2375 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2376 #else 2377 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2378 #endif 2379 /* --------------------------------------------------------------- 2380 Create the sequential matrix of the same type as the local block diagonal 2381 */ 2382 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2383 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2384 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2385 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2386 b = (Mat_SeqAIJ *)B->data; 2387 2388 /*-------------------------------------------------------------------- 2389 Copy my part of matrix column indices over 2390 */ 2391 sendcount = ad->nz + bd->nz; 2392 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2393 a_jsendbuf = ad->j; 2394 b_jsendbuf = bd->j; 2395 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2396 cnt = 0; 2397 for (i=0; i<n; i++) { 2398 2399 /* put in lower diagonal portion */ 2400 m = bd->i[i+1] - bd->i[i]; 2401 while (m > 0) { 2402 /* is it above diagonal (in bd (compressed) numbering) */ 2403 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2404 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2405 m--; 2406 } 2407 2408 /* put in diagonal portion */ 2409 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2410 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2411 } 2412 2413 /* put in upper diagonal portion */ 2414 while (m-- > 0) { 2415 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2416 } 2417 } 2418 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2419 2420 /*-------------------------------------------------------------------- 2421 Gather all column indices to all processors 2422 */ 2423 for (i=0; i<size; i++) { 2424 recvcounts[i] = 0; 2425 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2426 recvcounts[i] += lens[j]; 2427 } 2428 } 2429 displs[0] = 0; 2430 for (i=1; i<size; i++) { 2431 displs[i] = displs[i-1] + recvcounts[i-1]; 2432 } 2433 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2434 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2435 #else 2436 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2437 #endif 2438 /*-------------------------------------------------------------------- 2439 Assemble the matrix into useable form (note numerical values not yet set) 2440 */ 2441 /* set the b->ilen (length of each row) values */ 2442 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2443 /* set the b->i indices */ 2444 b->i[0] = 0; 2445 for (i=1; i<=A->rmap->N/bs; i++) { 2446 b->i[i] = b->i[i-1] + lens[i-1]; 2447 } 2448 ierr = PetscFree(lens);CHKERRQ(ierr); 2449 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2450 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2451 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2452 2453 if (A->symmetric){ 2454 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2455 } else if (A->hermitian) { 2456 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2457 } else if (A->structurally_symmetric) { 2458 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2459 } 2460 *newmat = B; 2461 PetscFunctionReturn(0); 2462 } 2463 2464 #undef __FUNCT__ 2465 #define __FUNCT__ "MatSOR_MPIBAIJ" 2466 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2467 { 2468 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2469 PetscErrorCode ierr; 2470 Vec bb1 = 0; 2471 2472 PetscFunctionBegin; 2473 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2474 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2475 } 2476 2477 if (flag == SOR_APPLY_UPPER) { 2478 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2479 PetscFunctionReturn(0); 2480 } 2481 2482 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2483 if (flag & SOR_ZERO_INITIAL_GUESS) { 2484 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2485 its--; 2486 } 2487 2488 while (its--) { 2489 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2490 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2491 2492 /* update rhs: bb1 = bb - B*x */ 2493 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2494 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2495 2496 /* local sweep */ 2497 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2498 } 2499 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 2500 if (flag & SOR_ZERO_INITIAL_GUESS) { 2501 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2502 its--; 2503 } 2504 while (its--) { 2505 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2506 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2507 2508 /* update rhs: bb1 = bb - B*x */ 2509 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2510 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2511 2512 /* local sweep */ 2513 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2514 } 2515 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 2516 if (flag & SOR_ZERO_INITIAL_GUESS) { 2517 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2518 its--; 2519 } 2520 while (its--) { 2521 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2522 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2523 2524 /* update rhs: bb1 = bb - B*x */ 2525 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2526 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2527 2528 /* local sweep */ 2529 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2530 } 2531 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2532 2533 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 2534 PetscFunctionReturn(0); 2535 } 2536 2537 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2538 2539 2540 /* -------------------------------------------------------------------*/ 2541 static struct _MatOps MatOps_Values = { 2542 MatSetValues_MPIBAIJ, 2543 MatGetRow_MPIBAIJ, 2544 MatRestoreRow_MPIBAIJ, 2545 MatMult_MPIBAIJ, 2546 /* 4*/ MatMultAdd_MPIBAIJ, 2547 MatMultTranspose_MPIBAIJ, 2548 MatMultTransposeAdd_MPIBAIJ, 2549 0, 2550 0, 2551 0, 2552 /*10*/ 0, 2553 0, 2554 0, 2555 MatSOR_MPIBAIJ, 2556 MatTranspose_MPIBAIJ, 2557 /*15*/ MatGetInfo_MPIBAIJ, 2558 MatEqual_MPIBAIJ, 2559 MatGetDiagonal_MPIBAIJ, 2560 MatDiagonalScale_MPIBAIJ, 2561 MatNorm_MPIBAIJ, 2562 /*20*/ MatAssemblyBegin_MPIBAIJ, 2563 MatAssemblyEnd_MPIBAIJ, 2564 MatSetOption_MPIBAIJ, 2565 MatZeroEntries_MPIBAIJ, 2566 /*24*/ MatZeroRows_MPIBAIJ, 2567 0, 2568 0, 2569 0, 2570 0, 2571 /*29*/ MatSetUpPreallocation_MPIBAIJ, 2572 0, 2573 0, 2574 0, 2575 0, 2576 /*34*/ MatDuplicate_MPIBAIJ, 2577 0, 2578 0, 2579 0, 2580 0, 2581 /*39*/ MatAXPY_MPIBAIJ, 2582 MatGetSubMatrices_MPIBAIJ, 2583 MatIncreaseOverlap_MPIBAIJ, 2584 MatGetValues_MPIBAIJ, 2585 MatCopy_MPIBAIJ, 2586 /*44*/ 0, 2587 MatScale_MPIBAIJ, 2588 0, 2589 0, 2590 0, 2591 /*49*/ MatSetBlockSize_MPIBAIJ, 2592 0, 2593 0, 2594 0, 2595 0, 2596 /*54*/ MatFDColoringCreate_MPIBAIJ, 2597 0, 2598 MatSetUnfactored_MPIBAIJ, 2599 MatPermute_MPIBAIJ, 2600 MatSetValuesBlocked_MPIBAIJ, 2601 /*59*/ MatGetSubMatrix_MPIBAIJ, 2602 MatDestroy_MPIBAIJ, 2603 MatView_MPIBAIJ, 2604 0, 2605 0, 2606 /*64*/ 0, 2607 0, 2608 0, 2609 0, 2610 0, 2611 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2612 0, 2613 0, 2614 0, 2615 0, 2616 /*74*/ 0, 2617 MatFDColoringApply_BAIJ, 2618 0, 2619 0, 2620 0, 2621 /*79*/ 0, 2622 0, 2623 0, 2624 0, 2625 MatLoad_MPIBAIJ, 2626 /*84*/ 0, 2627 0, 2628 0, 2629 0, 2630 0, 2631 /*89*/ 0, 2632 0, 2633 0, 2634 0, 2635 0, 2636 /*94*/ 0, 2637 0, 2638 0, 2639 0, 2640 0, 2641 /*99*/ 0, 2642 0, 2643 0, 2644 0, 2645 0, 2646 /*104*/0, 2647 MatRealPart_MPIBAIJ, 2648 MatImaginaryPart_MPIBAIJ, 2649 0, 2650 0, 2651 /*109*/0, 2652 0, 2653 0, 2654 0, 2655 0, 2656 /*114*/MatGetSeqNonzerostructure_MPIBAIJ, 2657 0, 2658 MatGetGhosts_MPIBAIJ 2659 }; 2660 2661 EXTERN_C_BEGIN 2662 #undef __FUNCT__ 2663 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2664 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2665 { 2666 PetscFunctionBegin; 2667 *a = ((Mat_MPIBAIJ *)A->data)->A; 2668 *iscopy = PETSC_FALSE; 2669 PetscFunctionReturn(0); 2670 } 2671 EXTERN_C_END 2672 2673 EXTERN_C_BEGIN 2674 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2675 EXTERN_C_END 2676 2677 EXTERN_C_BEGIN 2678 #undef __FUNCT__ 2679 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2680 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2681 { 2682 PetscInt m,rstart,cstart,cend; 2683 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2684 const PetscInt *JJ=0; 2685 PetscScalar *values=0; 2686 PetscErrorCode ierr; 2687 2688 PetscFunctionBegin; 2689 2690 if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2691 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2692 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2693 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2694 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2695 m = B->rmap->n/bs; 2696 rstart = B->rmap->rstart/bs; 2697 cstart = B->cmap->rstart/bs; 2698 cend = B->cmap->rend/bs; 2699 2700 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2701 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2702 for (i=0; i<m; i++) { 2703 nz = ii[i+1] - ii[i]; 2704 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2705 nz_max = PetscMax(nz_max,nz); 2706 JJ = jj + ii[i]; 2707 for (j=0; j<nz; j++) { 2708 if (*JJ >= cstart) break; 2709 JJ++; 2710 } 2711 d = 0; 2712 for (; j<nz; j++) { 2713 if (*JJ++ >= cend) break; 2714 d++; 2715 } 2716 d_nnz[i] = d; 2717 o_nnz[i] = nz - d; 2718 } 2719 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2720 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2721 2722 values = (PetscScalar*)V; 2723 if (!values) { 2724 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2725 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2726 } 2727 for (i=0; i<m; i++) { 2728 PetscInt row = i + rstart; 2729 PetscInt ncols = ii[i+1] - ii[i]; 2730 const PetscInt *icols = jj + ii[i]; 2731 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2732 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2733 } 2734 2735 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2736 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2737 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2738 2739 PetscFunctionReturn(0); 2740 } 2741 EXTERN_C_END 2742 2743 #undef __FUNCT__ 2744 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2745 /*@C 2746 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2747 (the default parallel PETSc format). 2748 2749 Collective on MPI_Comm 2750 2751 Input Parameters: 2752 + A - the matrix 2753 . i - the indices into j for the start of each local row (starts with zero) 2754 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2755 - v - optional values in the matrix 2756 2757 Level: developer 2758 2759 .keywords: matrix, aij, compressed row, sparse, parallel 2760 2761 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2762 @*/ 2763 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2764 { 2765 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2766 2767 PetscFunctionBegin; 2768 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2769 if (f) { 2770 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2771 } 2772 PetscFunctionReturn(0); 2773 } 2774 2775 EXTERN_C_BEGIN 2776 #undef __FUNCT__ 2777 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2778 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2779 { 2780 Mat_MPIBAIJ *b; 2781 PetscErrorCode ierr; 2782 PetscInt i, newbs = PetscAbs(bs); 2783 2784 PetscFunctionBegin; 2785 if (bs < 0) { 2786 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 2787 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2788 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2789 bs = PetscAbs(bs); 2790 } 2791 if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 2792 bs = newbs; 2793 2794 2795 if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2796 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2797 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2798 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2799 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2800 2801 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2802 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2803 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2804 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2805 2806 if (d_nnz) { 2807 for (i=0; i<B->rmap->n/bs; i++) { 2808 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2809 } 2810 } 2811 if (o_nnz) { 2812 for (i=0; i<B->rmap->n/bs; i++) { 2813 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2814 } 2815 } 2816 2817 b = (Mat_MPIBAIJ*)B->data; 2818 b->bs2 = bs*bs; 2819 b->mbs = B->rmap->n/bs; 2820 b->nbs = B->cmap->n/bs; 2821 b->Mbs = B->rmap->N/bs; 2822 b->Nbs = B->cmap->N/bs; 2823 2824 for (i=0; i<=b->size; i++) { 2825 b->rangebs[i] = B->rmap->range[i]/bs; 2826 } 2827 b->rstartbs = B->rmap->rstart/bs; 2828 b->rendbs = B->rmap->rend/bs; 2829 b->cstartbs = B->cmap->rstart/bs; 2830 b->cendbs = B->cmap->rend/bs; 2831 2832 if (!B->preallocated) { 2833 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2834 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2835 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2836 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2837 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2838 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2839 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2840 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2841 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 2842 } 2843 2844 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2845 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2846 B->preallocated = PETSC_TRUE; 2847 PetscFunctionReturn(0); 2848 } 2849 EXTERN_C_END 2850 2851 EXTERN_C_BEGIN 2852 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2853 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2854 EXTERN_C_END 2855 2856 2857 EXTERN_C_BEGIN 2858 #undef __FUNCT__ 2859 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 2860 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj) 2861 { 2862 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2863 PetscErrorCode ierr; 2864 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2865 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2866 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2867 2868 PetscFunctionBegin; 2869 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 2870 ii[0] = 0; 2871 CHKMEMQ; 2872 for (i=0; i<M; i++) { 2873 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2874 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 2875 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2876 /* remove one from count of matrix has diagonal */ 2877 for (j=id[i]; j<id[i+1]; j++) { 2878 if (jd[j] == i) {ii[i+1]--;break;} 2879 } 2880 CHKMEMQ; 2881 } 2882 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2883 cnt = 0; 2884 for (i=0; i<M; i++) { 2885 for (j=io[i]; j<io[i+1]; j++) { 2886 if (garray[jo[j]] > rstart) break; 2887 jj[cnt++] = garray[jo[j]]; 2888 CHKMEMQ; 2889 } 2890 for (k=id[i]; k<id[i+1]; k++) { 2891 if (jd[k] != i) { 2892 jj[cnt++] = rstart + jd[k]; 2893 CHKMEMQ; 2894 } 2895 } 2896 for (;j<io[i+1]; j++) { 2897 jj[cnt++] = garray[jo[j]]; 2898 CHKMEMQ; 2899 } 2900 } 2901 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 2902 PetscFunctionReturn(0); 2903 } 2904 EXTERN_C_END 2905 2906 EXTERN_C_BEGIN 2907 #if defined(PETSC_HAVE_MUMPS) 2908 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 2909 #endif 2910 EXTERN_C_END 2911 2912 /*MC 2913 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2914 2915 Options Database Keys: 2916 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2917 . -mat_block_size <bs> - set the blocksize used to store the matrix 2918 - -mat_use_hash_table <fact> 2919 2920 Level: beginner 2921 2922 .seealso: MatCreateMPIBAIJ 2923 M*/ 2924 2925 EXTERN_C_BEGIN 2926 #undef __FUNCT__ 2927 #define __FUNCT__ "MatCreate_MPIBAIJ" 2928 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 2929 { 2930 Mat_MPIBAIJ *b; 2931 PetscErrorCode ierr; 2932 PetscTruth flg; 2933 2934 PetscFunctionBegin; 2935 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2936 B->data = (void*)b; 2937 2938 2939 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2940 B->mapping = 0; 2941 B->assembled = PETSC_FALSE; 2942 2943 B->insertmode = NOT_SET_VALUES; 2944 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 2945 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 2946 2947 /* build local table of row and column ownerships */ 2948 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 2949 2950 /* build cache for off array entries formed */ 2951 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 2952 b->donotstash = PETSC_FALSE; 2953 b->colmap = PETSC_NULL; 2954 b->garray = PETSC_NULL; 2955 b->roworiented = PETSC_TRUE; 2956 2957 /* stuff used in block assembly */ 2958 b->barray = 0; 2959 2960 /* stuff used for matrix vector multiply */ 2961 b->lvec = 0; 2962 b->Mvctx = 0; 2963 2964 /* stuff for MatGetRow() */ 2965 b->rowindices = 0; 2966 b->rowvalues = 0; 2967 b->getrowactive = PETSC_FALSE; 2968 2969 /* hash table stuff */ 2970 b->ht = 0; 2971 b->hd = 0; 2972 b->ht_size = 0; 2973 b->ht_flag = PETSC_FALSE; 2974 b->ht_fact = 0; 2975 b->ht_total_ct = 0; 2976 b->ht_insert_ct = 0; 2977 2978 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 2979 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2980 if (flg) { 2981 PetscReal fact = 1.39; 2982 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2983 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 2984 if (fact <= 1.0) fact = 1.39; 2985 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2986 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2987 } 2988 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2989 2990 #if defined(PETSC_HAVE_MUMPS) 2991 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr); 2992 #endif 2993 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 2994 "MatConvert_MPIBAIJ_MPIAdj", 2995 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 2996 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2997 "MatStoreValues_MPIBAIJ", 2998 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2999 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3000 "MatRetrieveValues_MPIBAIJ", 3001 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3002 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3003 "MatGetDiagonalBlock_MPIBAIJ", 3004 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3005 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3006 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3007 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3008 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3009 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3010 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3011 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3012 "MatDiagonalScaleLocal_MPIBAIJ", 3013 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3014 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3015 "MatSetHashTableFactor_MPIBAIJ", 3016 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3017 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3018 PetscFunctionReturn(0); 3019 } 3020 EXTERN_C_END 3021 3022 /*MC 3023 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3024 3025 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3026 and MATMPIBAIJ otherwise. 3027 3028 Options Database Keys: 3029 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3030 3031 Level: beginner 3032 3033 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3034 M*/ 3035 3036 EXTERN_C_BEGIN 3037 #undef __FUNCT__ 3038 #define __FUNCT__ "MatCreate_BAIJ" 3039 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 3040 { 3041 PetscErrorCode ierr; 3042 PetscMPIInt size; 3043 3044 PetscFunctionBegin; 3045 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 3046 if (size == 1) { 3047 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 3048 } else { 3049 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 3050 } 3051 PetscFunctionReturn(0); 3052 } 3053 EXTERN_C_END 3054 3055 #undef __FUNCT__ 3056 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3057 /*@C 3058 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3059 (block compressed row). For good matrix assembly performance 3060 the user should preallocate the matrix storage by setting the parameters 3061 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3062 performance can be increased by more than a factor of 50. 3063 3064 Collective on Mat 3065 3066 Input Parameters: 3067 + A - the matrix 3068 . bs - size of blockk 3069 . d_nz - number of block nonzeros per block row in diagonal portion of local 3070 submatrix (same for all local rows) 3071 . d_nnz - array containing the number of block nonzeros in the various block rows 3072 of the in diagonal portion of the local (possibly different for each block 3073 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3074 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3075 submatrix (same for all local rows). 3076 - o_nnz - array containing the number of nonzeros in the various block rows of the 3077 off-diagonal portion of the local submatrix (possibly different for 3078 each block row) or PETSC_NULL. 3079 3080 If the *_nnz parameter is given then the *_nz parameter is ignored 3081 3082 Options Database Keys: 3083 + -mat_block_size - size of the blocks to use 3084 - -mat_use_hash_table <fact> 3085 3086 Notes: 3087 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3088 than it must be used on all processors that share the object for that argument. 3089 3090 Storage Information: 3091 For a square global matrix we define each processor's diagonal portion 3092 to be its local rows and the corresponding columns (a square submatrix); 3093 each processor's off-diagonal portion encompasses the remainder of the 3094 local matrix (a rectangular submatrix). 3095 3096 The user can specify preallocated storage for the diagonal part of 3097 the local submatrix with either d_nz or d_nnz (not both). Set 3098 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3099 memory allocation. Likewise, specify preallocated storage for the 3100 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3101 3102 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3103 the figure below we depict these three local rows and all columns (0-11). 3104 3105 .vb 3106 0 1 2 3 4 5 6 7 8 9 10 11 3107 ------------------- 3108 row 3 | o o o d d d o o o o o o 3109 row 4 | o o o d d d o o o o o o 3110 row 5 | o o o d d d o o o o o o 3111 ------------------- 3112 .ve 3113 3114 Thus, any entries in the d locations are stored in the d (diagonal) 3115 submatrix, and any entries in the o locations are stored in the 3116 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3117 stored simply in the MATSEQBAIJ format for compressed row storage. 3118 3119 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3120 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3121 In general, for PDE problems in which most nonzeros are near the diagonal, 3122 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3123 or you will get TERRIBLE performance; see the users' manual chapter on 3124 matrices. 3125 3126 You can call MatGetInfo() to get information on how effective the preallocation was; 3127 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3128 You can also run with the option -info and look for messages with the string 3129 malloc in them to see if additional memory allocation was needed. 3130 3131 Level: intermediate 3132 3133 .keywords: matrix, block, aij, compressed row, sparse, parallel 3134 3135 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 3136 @*/ 3137 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3138 { 3139 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3140 3141 PetscFunctionBegin; 3142 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3143 if (f) { 3144 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3145 } 3146 PetscFunctionReturn(0); 3147 } 3148 3149 #undef __FUNCT__ 3150 #define __FUNCT__ "MatCreateMPIBAIJ" 3151 /*@C 3152 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 3153 (block compressed row). For good matrix assembly performance 3154 the user should preallocate the matrix storage by setting the parameters 3155 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3156 performance can be increased by more than a factor of 50. 3157 3158 Collective on MPI_Comm 3159 3160 Input Parameters: 3161 + comm - MPI communicator 3162 . bs - size of blockk 3163 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3164 This value should be the same as the local size used in creating the 3165 y vector for the matrix-vector product y = Ax. 3166 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3167 This value should be the same as the local size used in creating the 3168 x vector for the matrix-vector product y = Ax. 3169 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3170 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3171 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3172 submatrix (same for all local rows) 3173 . d_nnz - array containing the number of nonzero blocks in the various block rows 3174 of the in diagonal portion of the local (possibly different for each block 3175 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3176 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3177 submatrix (same for all local rows). 3178 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3179 off-diagonal portion of the local submatrix (possibly different for 3180 each block row) or PETSC_NULL. 3181 3182 Output Parameter: 3183 . A - the matrix 3184 3185 Options Database Keys: 3186 + -mat_block_size - size of the blocks to use 3187 - -mat_use_hash_table <fact> 3188 3189 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3190 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3191 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3192 3193 Notes: 3194 If the *_nnz parameter is given then the *_nz parameter is ignored 3195 3196 A nonzero block is any block that as 1 or more nonzeros in it 3197 3198 The user MUST specify either the local or global matrix dimensions 3199 (possibly both). 3200 3201 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3202 than it must be used on all processors that share the object for that argument. 3203 3204 Storage Information: 3205 For a square global matrix we define each processor's diagonal portion 3206 to be its local rows and the corresponding columns (a square submatrix); 3207 each processor's off-diagonal portion encompasses the remainder of the 3208 local matrix (a rectangular submatrix). 3209 3210 The user can specify preallocated storage for the diagonal part of 3211 the local submatrix with either d_nz or d_nnz (not both). Set 3212 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3213 memory allocation. Likewise, specify preallocated storage for the 3214 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3215 3216 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3217 the figure below we depict these three local rows and all columns (0-11). 3218 3219 .vb 3220 0 1 2 3 4 5 6 7 8 9 10 11 3221 ------------------- 3222 row 3 | o o o d d d o o o o o o 3223 row 4 | o o o d d d o o o o o o 3224 row 5 | o o o d d d o o o o o o 3225 ------------------- 3226 .ve 3227 3228 Thus, any entries in the d locations are stored in the d (diagonal) 3229 submatrix, and any entries in the o locations are stored in the 3230 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3231 stored simply in the MATSEQBAIJ format for compressed row storage. 3232 3233 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3234 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3235 In general, for PDE problems in which most nonzeros are near the diagonal, 3236 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3237 or you will get TERRIBLE performance; see the users' manual chapter on 3238 matrices. 3239 3240 Level: intermediate 3241 3242 .keywords: matrix, block, aij, compressed row, sparse, parallel 3243 3244 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3245 @*/ 3246 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3247 { 3248 PetscErrorCode ierr; 3249 PetscMPIInt size; 3250 3251 PetscFunctionBegin; 3252 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3253 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3254 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3255 if (size > 1) { 3256 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3257 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3258 } else { 3259 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3260 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3261 } 3262 PetscFunctionReturn(0); 3263 } 3264 3265 #undef __FUNCT__ 3266 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3267 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3268 { 3269 Mat mat; 3270 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3271 PetscErrorCode ierr; 3272 PetscInt len=0; 3273 3274 PetscFunctionBegin; 3275 *newmat = 0; 3276 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3277 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3278 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3279 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3280 3281 mat->factortype = matin->factortype; 3282 mat->preallocated = PETSC_TRUE; 3283 mat->assembled = PETSC_TRUE; 3284 mat->insertmode = NOT_SET_VALUES; 3285 3286 a = (Mat_MPIBAIJ*)mat->data; 3287 mat->rmap->bs = matin->rmap->bs; 3288 a->bs2 = oldmat->bs2; 3289 a->mbs = oldmat->mbs; 3290 a->nbs = oldmat->nbs; 3291 a->Mbs = oldmat->Mbs; 3292 a->Nbs = oldmat->Nbs; 3293 3294 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3295 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3296 3297 a->size = oldmat->size; 3298 a->rank = oldmat->rank; 3299 a->donotstash = oldmat->donotstash; 3300 a->roworiented = oldmat->roworiented; 3301 a->rowindices = 0; 3302 a->rowvalues = 0; 3303 a->getrowactive = PETSC_FALSE; 3304 a->barray = 0; 3305 a->rstartbs = oldmat->rstartbs; 3306 a->rendbs = oldmat->rendbs; 3307 a->cstartbs = oldmat->cstartbs; 3308 a->cendbs = oldmat->cendbs; 3309 3310 /* hash table stuff */ 3311 a->ht = 0; 3312 a->hd = 0; 3313 a->ht_size = 0; 3314 a->ht_flag = oldmat->ht_flag; 3315 a->ht_fact = oldmat->ht_fact; 3316 a->ht_total_ct = 0; 3317 a->ht_insert_ct = 0; 3318 3319 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3320 if (oldmat->colmap) { 3321 #if defined (PETSC_USE_CTABLE) 3322 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3323 #else 3324 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3325 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3326 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3327 #endif 3328 } else a->colmap = 0; 3329 3330 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3331 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3332 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3333 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3334 } else a->garray = 0; 3335 3336 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3337 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3338 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3339 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3340 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3341 3342 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3343 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3344 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3345 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3346 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3347 *newmat = mat; 3348 3349 PetscFunctionReturn(0); 3350 } 3351 3352 #undef __FUNCT__ 3353 #define __FUNCT__ "MatLoad_MPIBAIJ" 3354 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 3355 { 3356 Mat A; 3357 PetscErrorCode ierr; 3358 int fd; 3359 PetscInt i,nz,j,rstart,rend; 3360 PetscScalar *vals,*buf; 3361 MPI_Comm comm = ((PetscObject)viewer)->comm; 3362 MPI_Status status; 3363 PetscMPIInt rank,size,maxnz; 3364 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3365 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3366 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3367 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3368 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3369 PetscInt dcount,kmax,k,nzcount,tmp,mend; 3370 3371 PetscFunctionBegin; 3372 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3373 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3374 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3375 3376 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3377 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3378 if (!rank) { 3379 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3380 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3381 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3382 } 3383 3384 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3385 M = header[1]; N = header[2]; 3386 3387 if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices"); 3388 3389 /* 3390 This code adds extra rows to make sure the number of rows is 3391 divisible by the blocksize 3392 */ 3393 Mbs = M/bs; 3394 extra_rows = bs - M + bs*Mbs; 3395 if (extra_rows == bs) extra_rows = 0; 3396 else Mbs++; 3397 if (extra_rows && !rank) { 3398 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3399 } 3400 3401 /* determine ownership of all rows */ 3402 mbs = Mbs/size + ((Mbs % size) > rank); 3403 m = mbs*bs; 3404 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3405 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3406 3407 /* process 0 needs enough room for process with most rows */ 3408 if (!rank) { 3409 mmax = rowners[1]; 3410 for (i=2; i<size; i++) { 3411 mmax = PetscMax(mmax,rowners[i]); 3412 } 3413 mmax*=bs; 3414 } else mmax = m; 3415 3416 rowners[0] = 0; 3417 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3418 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3419 rstart = rowners[rank]; 3420 rend = rowners[rank+1]; 3421 3422 /* distribute row lengths to all processors */ 3423 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3424 if (!rank) { 3425 mend = m; 3426 if (size == 1) mend = mend - extra_rows; 3427 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3428 for (j=mend; j<m; j++) locrowlens[j] = 1; 3429 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3430 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3431 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3432 for (j=0; j<m; j++) { 3433 procsnz[0] += locrowlens[j]; 3434 } 3435 for (i=1; i<size; i++) { 3436 mend = browners[i+1] - browners[i]; 3437 if (i == size-1) mend = mend - extra_rows; 3438 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3439 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3440 /* calculate the number of nonzeros on each processor */ 3441 for (j=0; j<browners[i+1]-browners[i]; j++) { 3442 procsnz[i] += rowlengths[j]; 3443 } 3444 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3445 } 3446 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3447 } else { 3448 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3449 } 3450 3451 if (!rank) { 3452 /* determine max buffer needed and allocate it */ 3453 maxnz = procsnz[0]; 3454 for (i=1; i<size; i++) { 3455 maxnz = PetscMax(maxnz,procsnz[i]); 3456 } 3457 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3458 3459 /* read in my part of the matrix column indices */ 3460 nz = procsnz[0]; 3461 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3462 mycols = ibuf; 3463 if (size == 1) nz -= extra_rows; 3464 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3465 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3466 3467 /* read in every ones (except the last) and ship off */ 3468 for (i=1; i<size-1; i++) { 3469 nz = procsnz[i]; 3470 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3471 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3472 } 3473 /* read in the stuff for the last proc */ 3474 if (size != 1) { 3475 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3476 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3477 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3478 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3479 } 3480 ierr = PetscFree(cols);CHKERRQ(ierr); 3481 } else { 3482 /* determine buffer space needed for message */ 3483 nz = 0; 3484 for (i=0; i<m; i++) { 3485 nz += locrowlens[i]; 3486 } 3487 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3488 mycols = ibuf; 3489 /* receive message of column indices*/ 3490 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3491 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3492 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3493 } 3494 3495 /* loop over local rows, determining number of off diagonal entries */ 3496 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3497 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3498 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3499 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3500 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3501 rowcount = 0; nzcount = 0; 3502 for (i=0; i<mbs; i++) { 3503 dcount = 0; 3504 odcount = 0; 3505 for (j=0; j<bs; j++) { 3506 kmax = locrowlens[rowcount]; 3507 for (k=0; k<kmax; k++) { 3508 tmp = mycols[nzcount++]/bs; 3509 if (!mask[tmp]) { 3510 mask[tmp] = 1; 3511 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3512 else masked1[dcount++] = tmp; 3513 } 3514 } 3515 rowcount++; 3516 } 3517 3518 dlens[i] = dcount; 3519 odlens[i] = odcount; 3520 3521 /* zero out the mask elements we set */ 3522 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3523 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3524 } 3525 3526 /* create our matrix */ 3527 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 3528 ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3529 ierr = MatSetType(A,type);CHKERRQ(ierr); 3530 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3531 3532 if (!rank) { 3533 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3534 /* read in my part of the matrix numerical values */ 3535 nz = procsnz[0]; 3536 vals = buf; 3537 mycols = ibuf; 3538 if (size == 1) nz -= extra_rows; 3539 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3540 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3541 3542 /* insert into matrix */ 3543 jj = rstart*bs; 3544 for (i=0; i<m; i++) { 3545 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3546 mycols += locrowlens[i]; 3547 vals += locrowlens[i]; 3548 jj++; 3549 } 3550 /* read in other processors (except the last one) and ship out */ 3551 for (i=1; i<size-1; i++) { 3552 nz = procsnz[i]; 3553 vals = buf; 3554 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3555 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3556 } 3557 /* the last proc */ 3558 if (size != 1){ 3559 nz = procsnz[i] - extra_rows; 3560 vals = buf; 3561 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3562 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3563 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3564 } 3565 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3566 } else { 3567 /* receive numeric values */ 3568 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3569 3570 /* receive message of values*/ 3571 vals = buf; 3572 mycols = ibuf; 3573 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 3574 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 3575 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3576 3577 /* insert into matrix */ 3578 jj = rstart*bs; 3579 for (i=0; i<m; i++) { 3580 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3581 mycols += locrowlens[i]; 3582 vals += locrowlens[i]; 3583 jj++; 3584 } 3585 } 3586 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3587 ierr = PetscFree(buf);CHKERRQ(ierr); 3588 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3589 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3590 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3591 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3592 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3593 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3594 3595 *newmat = A; 3596 PetscFunctionReturn(0); 3597 } 3598 3599 #undef __FUNCT__ 3600 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3601 /*@ 3602 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3603 3604 Input Parameters: 3605 . mat - the matrix 3606 . fact - factor 3607 3608 Collective on Mat 3609 3610 Level: advanced 3611 3612 Notes: 3613 This can also be set by the command line option: -mat_use_hash_table <fact> 3614 3615 .keywords: matrix, hashtable, factor, HT 3616 3617 .seealso: MatSetOption() 3618 @*/ 3619 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3620 { 3621 PetscErrorCode ierr,(*f)(Mat,PetscReal); 3622 3623 PetscFunctionBegin; 3624 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 3625 if (f) { 3626 ierr = (*f)(mat,fact);CHKERRQ(ierr); 3627 } 3628 PetscFunctionReturn(0); 3629 } 3630 3631 EXTERN_C_BEGIN 3632 #undef __FUNCT__ 3633 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3634 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3635 { 3636 Mat_MPIBAIJ *baij; 3637 3638 PetscFunctionBegin; 3639 baij = (Mat_MPIBAIJ*)mat->data; 3640 baij->ht_fact = fact; 3641 PetscFunctionReturn(0); 3642 } 3643 EXTERN_C_END 3644 3645 #undef __FUNCT__ 3646 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3647 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3648 { 3649 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3650 PetscFunctionBegin; 3651 *Ad = a->A; 3652 *Ao = a->B; 3653 *colmap = a->garray; 3654 PetscFunctionReturn(0); 3655 } 3656 3657 /* 3658 Special version for direct calls from Fortran (to eliminate two function call overheads 3659 */ 3660 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3661 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3662 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3663 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3664 #endif 3665 3666 #undef __FUNCT__ 3667 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3668 /*@C 3669 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3670 3671 Collective on Mat 3672 3673 Input Parameters: 3674 + mat - the matrix 3675 . min - number of input rows 3676 . im - input rows 3677 . nin - number of input columns 3678 . in - input columns 3679 . v - numerical values input 3680 - addvin - INSERT_VALUES or ADD_VALUES 3681 3682 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3683 3684 Level: advanced 3685 3686 .seealso: MatSetValuesBlocked() 3687 @*/ 3688 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3689 { 3690 /* convert input arguments to C version */ 3691 Mat mat = *matin; 3692 PetscInt m = *min, n = *nin; 3693 InsertMode addv = *addvin; 3694 3695 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3696 const MatScalar *value; 3697 MatScalar *barray=baij->barray; 3698 PetscTruth roworiented = baij->roworiented; 3699 PetscErrorCode ierr; 3700 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3701 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3702 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3703 3704 PetscFunctionBegin; 3705 /* tasks normally handled by MatSetValuesBlocked() */ 3706 if (mat->insertmode == NOT_SET_VALUES) { 3707 mat->insertmode = addv; 3708 } 3709 #if defined(PETSC_USE_DEBUG) 3710 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3711 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3712 #endif 3713 if (mat->assembled) { 3714 mat->was_assembled = PETSC_TRUE; 3715 mat->assembled = PETSC_FALSE; 3716 } 3717 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3718 3719 3720 if(!barray) { 3721 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3722 baij->barray = barray; 3723 } 3724 3725 if (roworiented) { 3726 stepval = (n-1)*bs; 3727 } else { 3728 stepval = (m-1)*bs; 3729 } 3730 for (i=0; i<m; i++) { 3731 if (im[i] < 0) continue; 3732 #if defined(PETSC_USE_DEBUG) 3733 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3734 #endif 3735 if (im[i] >= rstart && im[i] < rend) { 3736 row = im[i] - rstart; 3737 for (j=0; j<n; j++) { 3738 /* If NumCol = 1 then a copy is not required */ 3739 if ((roworiented) && (n == 1)) { 3740 barray = (MatScalar*)v + i*bs2; 3741 } else if((!roworiented) && (m == 1)) { 3742 barray = (MatScalar*)v + j*bs2; 3743 } else { /* Here a copy is required */ 3744 if (roworiented) { 3745 value = v + i*(stepval+bs)*bs + j*bs; 3746 } else { 3747 value = v + j*(stepval+bs)*bs + i*bs; 3748 } 3749 for (ii=0; ii<bs; ii++,value+=stepval) { 3750 for (jj=0; jj<bs; jj++) { 3751 *barray++ = *value++; 3752 } 3753 } 3754 barray -=bs2; 3755 } 3756 3757 if (in[j] >= cstart && in[j] < cend){ 3758 col = in[j] - cstart; 3759 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3760 } 3761 else if (in[j] < 0) continue; 3762 #if defined(PETSC_USE_DEBUG) 3763 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 3764 #endif 3765 else { 3766 if (mat->was_assembled) { 3767 if (!baij->colmap) { 3768 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3769 } 3770 3771 #if defined(PETSC_USE_DEBUG) 3772 #if defined (PETSC_USE_CTABLE) 3773 { PetscInt data; 3774 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3775 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3776 } 3777 #else 3778 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3779 #endif 3780 #endif 3781 #if defined (PETSC_USE_CTABLE) 3782 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3783 col = (col - 1)/bs; 3784 #else 3785 col = (baij->colmap[in[j]] - 1)/bs; 3786 #endif 3787 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3788 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3789 col = in[j]; 3790 } 3791 } 3792 else col = in[j]; 3793 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3794 } 3795 } 3796 } else { 3797 if (!baij->donotstash) { 3798 if (roworiented) { 3799 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3800 } else { 3801 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3802 } 3803 } 3804 } 3805 } 3806 3807 /* task normally handled by MatSetValuesBlocked() */ 3808 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3809 PetscFunctionReturn(0); 3810 } 3811