1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 7 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 8 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 9 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 10 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 11 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 12 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 13 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar); 16 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 19 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 20 { 21 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 22 PetscErrorCode ierr; 23 PetscInt i,*idxb = 0; 24 PetscScalar *va,*vb; 25 Vec vtmp; 26 27 PetscFunctionBegin; 28 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 29 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 30 if (idx) { 31 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 32 } 33 34 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 35 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 36 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 37 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 38 39 for (i=0; i<A->rmap->n; i++){ 40 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 41 } 42 43 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 44 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 45 if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);} 46 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 47 PetscFunctionReturn(0); 48 } 49 50 EXTERN_C_BEGIN 51 #undef __FUNCT__ 52 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 53 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 54 { 55 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 56 PetscErrorCode ierr; 57 58 PetscFunctionBegin; 59 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 60 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 61 PetscFunctionReturn(0); 62 } 63 EXTERN_C_END 64 65 EXTERN_C_BEGIN 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 68 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 69 { 70 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 75 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 76 PetscFunctionReturn(0); 77 } 78 EXTERN_C_END 79 80 /* 81 Local utility routine that creates a mapping from the global column 82 number to the local number in the off-diagonal part of the local 83 storage of the matrix. This is done in a non scalable way since the 84 length of colmap equals the global matrix length. 85 */ 86 #undef __FUNCT__ 87 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 88 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 89 { 90 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 91 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 92 PetscErrorCode ierr; 93 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 94 95 PetscFunctionBegin; 96 #if defined (PETSC_USE_CTABLE) 97 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 98 for (i=0; i<nbs; i++){ 99 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 100 } 101 #else 102 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 103 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 104 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 105 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 106 #endif 107 PetscFunctionReturn(0); 108 } 109 110 #define CHUNKSIZE 10 111 112 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 113 { \ 114 \ 115 brow = row/bs; \ 116 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 117 rmax = aimax[brow]; nrow = ailen[brow]; \ 118 bcol = col/bs; \ 119 ridx = row % bs; cidx = col % bs; \ 120 low = 0; high = nrow; \ 121 while (high-low > 3) { \ 122 t = (low+high)/2; \ 123 if (rp[t] > bcol) high = t; \ 124 else low = t; \ 125 } \ 126 for (_i=low; _i<high; _i++) { \ 127 if (rp[_i] > bcol) break; \ 128 if (rp[_i] == bcol) { \ 129 bap = ap + bs2*_i + bs*cidx + ridx; \ 130 if (addv == ADD_VALUES) *bap += value; \ 131 else *bap = value; \ 132 goto a_noinsert; \ 133 } \ 134 } \ 135 if (a->nonew == 1) goto a_noinsert; \ 136 if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 137 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 138 N = nrow++ - 1; \ 139 /* shift up all the later entries in this row */ \ 140 for (ii=N; ii>=_i; ii--) { \ 141 rp[ii+1] = rp[ii]; \ 142 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 143 } \ 144 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 145 rp[_i] = bcol; \ 146 ap[bs2*_i + bs*cidx + ridx] = value; \ 147 a_noinsert:; \ 148 ailen[brow] = nrow; \ 149 } 150 151 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 152 { \ 153 brow = row/bs; \ 154 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 155 rmax = bimax[brow]; nrow = bilen[brow]; \ 156 bcol = col/bs; \ 157 ridx = row % bs; cidx = col % bs; \ 158 low = 0; high = nrow; \ 159 while (high-low > 3) { \ 160 t = (low+high)/2; \ 161 if (rp[t] > bcol) high = t; \ 162 else low = t; \ 163 } \ 164 for (_i=low; _i<high; _i++) { \ 165 if (rp[_i] > bcol) break; \ 166 if (rp[_i] == bcol) { \ 167 bap = ap + bs2*_i + bs*cidx + ridx; \ 168 if (addv == ADD_VALUES) *bap += value; \ 169 else *bap = value; \ 170 goto b_noinsert; \ 171 } \ 172 } \ 173 if (b->nonew == 1) goto b_noinsert; \ 174 if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 175 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 176 CHKMEMQ;\ 177 N = nrow++ - 1; \ 178 /* shift up all the later entries in this row */ \ 179 for (ii=N; ii>=_i; ii--) { \ 180 rp[ii+1] = rp[ii]; \ 181 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 182 } \ 183 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 184 rp[_i] = bcol; \ 185 ap[bs2*_i + bs*cidx + ridx] = value; \ 186 b_noinsert:; \ 187 bilen[brow] = nrow; \ 188 } 189 190 #undef __FUNCT__ 191 #define __FUNCT__ "MatSetValues_MPIBAIJ" 192 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 193 { 194 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 195 MatScalar value; 196 PetscTruth roworiented = baij->roworiented; 197 PetscErrorCode ierr; 198 PetscInt i,j,row,col; 199 PetscInt rstart_orig=mat->rmap->rstart; 200 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 201 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 202 203 /* Some Variables required in the macro */ 204 Mat A = baij->A; 205 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 206 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 207 MatScalar *aa=a->a; 208 209 Mat B = baij->B; 210 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 211 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 212 MatScalar *ba=b->a; 213 214 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 215 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 216 MatScalar *ap,*bap; 217 218 PetscFunctionBegin; 219 if (v) PetscValidScalarPointer(v,6); 220 for (i=0; i<m; i++) { 221 if (im[i] < 0) continue; 222 #if defined(PETSC_USE_DEBUG) 223 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 224 #endif 225 if (im[i] >= rstart_orig && im[i] < rend_orig) { 226 row = im[i] - rstart_orig; 227 for (j=0; j<n; j++) { 228 if (in[j] >= cstart_orig && in[j] < cend_orig){ 229 col = in[j] - cstart_orig; 230 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 231 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 232 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 233 } else if (in[j] < 0) continue; 234 #if defined(PETSC_USE_DEBUG) 235 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap->N-1);} 236 #endif 237 else { 238 if (mat->was_assembled) { 239 if (!baij->colmap) { 240 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 241 } 242 #if defined (PETSC_USE_CTABLE) 243 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 244 col = col - 1; 245 #else 246 col = baij->colmap[in[j]/bs] - 1; 247 #endif 248 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 249 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 250 col = in[j]; 251 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 252 B = baij->B; 253 b = (Mat_SeqBAIJ*)(B)->data; 254 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 255 ba=b->a; 256 } else col += in[j]%bs; 257 } else col = in[j]; 258 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 259 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!baij->donotstash) { 265 if (roworiented) { 266 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 267 } else { 268 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 269 } 270 } 271 } 272 } 273 PetscFunctionReturn(0); 274 } 275 276 #undef __FUNCT__ 277 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 278 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 279 { 280 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 281 const PetscScalar *value; 282 MatScalar *barray=baij->barray; 283 PetscTruth roworiented = baij->roworiented; 284 PetscErrorCode ierr; 285 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 286 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 287 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 288 289 PetscFunctionBegin; 290 if(!barray) { 291 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 292 baij->barray = barray; 293 } 294 295 if (roworiented) { 296 stepval = (n-1)*bs; 297 } else { 298 stepval = (m-1)*bs; 299 } 300 for (i=0; i<m; i++) { 301 if (im[i] < 0) continue; 302 #if defined(PETSC_USE_DEBUG) 303 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 304 #endif 305 if (im[i] >= rstart && im[i] < rend) { 306 row = im[i] - rstart; 307 for (j=0; j<n; j++) { 308 /* If NumCol = 1 then a copy is not required */ 309 if ((roworiented) && (n == 1)) { 310 barray = (MatScalar*)v + i*bs2; 311 } else if((!roworiented) && (m == 1)) { 312 barray = (MatScalar*)v + j*bs2; 313 } else { /* Here a copy is required */ 314 if (roworiented) { 315 value = v + i*(stepval+bs)*bs + j*bs; 316 } else { 317 value = v + j*(stepval+bs)*bs + i*bs; 318 } 319 for (ii=0; ii<bs; ii++,value+=stepval) { 320 for (jj=0; jj<bs; jj++) { 321 *barray++ = *value++; 322 } 323 } 324 barray -=bs2; 325 } 326 327 if (in[j] >= cstart && in[j] < cend){ 328 col = in[j] - cstart; 329 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 330 } 331 else if (in[j] < 0) continue; 332 #if defined(PETSC_USE_DEBUG) 333 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 334 #endif 335 else { 336 if (mat->was_assembled) { 337 if (!baij->colmap) { 338 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 339 } 340 341 #if defined(PETSC_USE_DEBUG) 342 #if defined (PETSC_USE_CTABLE) 343 { PetscInt data; 344 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 345 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 346 } 347 #else 348 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 349 #endif 350 #endif 351 #if defined (PETSC_USE_CTABLE) 352 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 353 col = (col - 1)/bs; 354 #else 355 col = (baij->colmap[in[j]] - 1)/bs; 356 #endif 357 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 358 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 359 col = in[j]; 360 } 361 } 362 else col = in[j]; 363 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 364 } 365 } 366 } else { 367 if (!baij->donotstash) { 368 if (roworiented) { 369 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 370 } else { 371 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 372 } 373 } 374 } 375 } 376 PetscFunctionReturn(0); 377 } 378 379 #define HASH_KEY 0.6180339887 380 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 381 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 382 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 383 #undef __FUNCT__ 384 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 385 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 386 { 387 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 388 PetscTruth roworiented = baij->roworiented; 389 PetscErrorCode ierr; 390 PetscInt i,j,row,col; 391 PetscInt rstart_orig=mat->rmap->rstart; 392 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 393 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 394 PetscReal tmp; 395 MatScalar **HD = baij->hd,value; 396 #if defined(PETSC_USE_DEBUG) 397 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 398 #endif 399 400 PetscFunctionBegin; 401 if (v) PetscValidScalarPointer(v,6); 402 for (i=0; i<m; i++) { 403 #if defined(PETSC_USE_DEBUG) 404 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 405 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 406 #endif 407 row = im[i]; 408 if (row >= rstart_orig && row < rend_orig) { 409 for (j=0; j<n; j++) { 410 col = in[j]; 411 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 412 /* Look up PetscInto the Hash Table */ 413 key = (row/bs)*Nbs+(col/bs)+1; 414 h1 = HASH(size,key,tmp); 415 416 417 idx = h1; 418 #if defined(PETSC_USE_DEBUG) 419 insert_ct++; 420 total_ct++; 421 if (HT[idx] != key) { 422 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 423 if (idx == size) { 424 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 425 if (idx == h1) { 426 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 427 } 428 } 429 } 430 #else 431 if (HT[idx] != key) { 432 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 433 if (idx == size) { 434 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 435 if (idx == h1) { 436 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 437 } 438 } 439 } 440 #endif 441 /* A HASH table entry is found, so insert the values at the correct address */ 442 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 443 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 444 } 445 } else { 446 if (!baij->donotstash) { 447 if (roworiented) { 448 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 449 } else { 450 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 451 } 452 } 453 } 454 } 455 #if defined(PETSC_USE_DEBUG) 456 baij->ht_total_ct = total_ct; 457 baij->ht_insert_ct = insert_ct; 458 #endif 459 PetscFunctionReturn(0); 460 } 461 462 #undef __FUNCT__ 463 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 464 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 465 { 466 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 467 PetscTruth roworiented = baij->roworiented; 468 PetscErrorCode ierr; 469 PetscInt i,j,ii,jj,row,col; 470 PetscInt rstart=baij->rstartbs; 471 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 472 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 473 PetscReal tmp; 474 MatScalar **HD = baij->hd,*baij_a; 475 const PetscScalar *v_t,*value; 476 #if defined(PETSC_USE_DEBUG) 477 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 478 #endif 479 480 PetscFunctionBegin; 481 482 if (roworiented) { 483 stepval = (n-1)*bs; 484 } else { 485 stepval = (m-1)*bs; 486 } 487 for (i=0; i<m; i++) { 488 #if defined(PETSC_USE_DEBUG) 489 if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 490 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 491 #endif 492 row = im[i]; 493 v_t = v + i*nbs2; 494 if (row >= rstart && row < rend) { 495 for (j=0; j<n; j++) { 496 col = in[j]; 497 498 /* Look up into the Hash Table */ 499 key = row*Nbs+col+1; 500 h1 = HASH(size,key,tmp); 501 502 idx = h1; 503 #if defined(PETSC_USE_DEBUG) 504 total_ct++; 505 insert_ct++; 506 if (HT[idx] != key) { 507 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 508 if (idx == size) { 509 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 510 if (idx == h1) { 511 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 512 } 513 } 514 } 515 #else 516 if (HT[idx] != key) { 517 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 518 if (idx == size) { 519 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 520 if (idx == h1) { 521 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 522 } 523 } 524 } 525 #endif 526 baij_a = HD[idx]; 527 if (roworiented) { 528 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 529 /* value = v + (i*(stepval+bs)+j)*bs; */ 530 value = v_t; 531 v_t += bs; 532 if (addv == ADD_VALUES) { 533 for (ii=0; ii<bs; ii++,value+=stepval) { 534 for (jj=ii; jj<bs2; jj+=bs) { 535 baij_a[jj] += *value++; 536 } 537 } 538 } else { 539 for (ii=0; ii<bs; ii++,value+=stepval) { 540 for (jj=ii; jj<bs2; jj+=bs) { 541 baij_a[jj] = *value++; 542 } 543 } 544 } 545 } else { 546 value = v + j*(stepval+bs)*bs + i*bs; 547 if (addv == ADD_VALUES) { 548 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 549 for (jj=0; jj<bs; jj++) { 550 baij_a[jj] += *value++; 551 } 552 } 553 } else { 554 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 555 for (jj=0; jj<bs; jj++) { 556 baij_a[jj] = *value++; 557 } 558 } 559 } 560 } 561 } 562 } else { 563 if (!baij->donotstash) { 564 if (roworiented) { 565 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 566 } else { 567 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 568 } 569 } 570 } 571 } 572 #if defined(PETSC_USE_DEBUG) 573 baij->ht_total_ct = total_ct; 574 baij->ht_insert_ct = insert_ct; 575 #endif 576 PetscFunctionReturn(0); 577 } 578 579 #undef __FUNCT__ 580 #define __FUNCT__ "MatGetValues_MPIBAIJ" 581 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 582 { 583 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 584 PetscErrorCode ierr; 585 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 586 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 587 588 PetscFunctionBegin; 589 for (i=0; i<m; i++) { 590 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 591 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 592 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 593 row = idxm[i] - bsrstart; 594 for (j=0; j<n; j++) { 595 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 596 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 597 if (idxn[j] >= bscstart && idxn[j] < bscend){ 598 col = idxn[j] - bscstart; 599 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 600 } else { 601 if (!baij->colmap) { 602 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 603 } 604 #if defined (PETSC_USE_CTABLE) 605 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 606 data --; 607 #else 608 data = baij->colmap[idxn[j]/bs]-1; 609 #endif 610 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 611 else { 612 col = data + idxn[j]%bs; 613 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 614 } 615 } 616 } 617 } else { 618 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 619 } 620 } 621 PetscFunctionReturn(0); 622 } 623 624 #undef __FUNCT__ 625 #define __FUNCT__ "MatNorm_MPIBAIJ" 626 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 627 { 628 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 629 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 630 PetscErrorCode ierr; 631 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 632 PetscReal sum = 0.0; 633 MatScalar *v; 634 635 PetscFunctionBegin; 636 if (baij->size == 1) { 637 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 638 } else { 639 if (type == NORM_FROBENIUS) { 640 v = amat->a; 641 nz = amat->nz*bs2; 642 for (i=0; i<nz; i++) { 643 #if defined(PETSC_USE_COMPLEX) 644 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 645 #else 646 sum += (*v)*(*v); v++; 647 #endif 648 } 649 v = bmat->a; 650 nz = bmat->nz*bs2; 651 for (i=0; i<nz; i++) { 652 #if defined(PETSC_USE_COMPLEX) 653 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 654 #else 655 sum += (*v)*(*v); v++; 656 #endif 657 } 658 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 659 *nrm = sqrt(*nrm); 660 } else if (type == NORM_1) { /* max column sum */ 661 PetscReal *tmp,*tmp2; 662 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 663 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 664 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 665 v = amat->a; jj = amat->j; 666 for (i=0; i<amat->nz; i++) { 667 for (j=0; j<bs; j++){ 668 col = bs*(cstart + *jj) + j; /* column index */ 669 for (row=0; row<bs; row++){ 670 tmp[col] += PetscAbsScalar(*v); v++; 671 } 672 } 673 jj++; 674 } 675 v = bmat->a; jj = bmat->j; 676 for (i=0; i<bmat->nz; i++) { 677 for (j=0; j<bs; j++){ 678 col = bs*garray[*jj] + j; 679 for (row=0; row<bs; row++){ 680 tmp[col] += PetscAbsScalar(*v); v++; 681 } 682 } 683 jj++; 684 } 685 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 686 *nrm = 0.0; 687 for (j=0; j<mat->cmap->N; j++) { 688 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 689 } 690 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 691 } else if (type == NORM_INFINITY) { /* max row sum */ 692 PetscReal *sums; 693 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr) 694 sum = 0.0; 695 for (j=0; j<amat->mbs; j++) { 696 for (row=0; row<bs; row++) sums[row] = 0.0; 697 v = amat->a + bs2*amat->i[j]; 698 nz = amat->i[j+1]-amat->i[j]; 699 for (i=0; i<nz; i++) { 700 for (col=0; col<bs; col++){ 701 for (row=0; row<bs; row++){ 702 sums[row] += PetscAbsScalar(*v); v++; 703 } 704 } 705 } 706 v = bmat->a + bs2*bmat->i[j]; 707 nz = bmat->i[j+1]-bmat->i[j]; 708 for (i=0; i<nz; i++) { 709 for (col=0; col<bs; col++){ 710 for (row=0; row<bs; row++){ 711 sums[row] += PetscAbsScalar(*v); v++; 712 } 713 } 714 } 715 for (row=0; row<bs; row++){ 716 if (sums[row] > sum) sum = sums[row]; 717 } 718 } 719 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 720 ierr = PetscFree(sums);CHKERRQ(ierr); 721 } else { 722 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 723 } 724 } 725 PetscFunctionReturn(0); 726 } 727 728 /* 729 Creates the hash table, and sets the table 730 This table is created only once. 731 If new entried need to be added to the matrix 732 then the hash table has to be destroyed and 733 recreated. 734 */ 735 #undef __FUNCT__ 736 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 737 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 738 { 739 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 740 Mat A = baij->A,B=baij->B; 741 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 742 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 743 PetscErrorCode ierr; 744 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 745 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 746 PetscInt *HT,key; 747 MatScalar **HD; 748 PetscReal tmp; 749 #if defined(PETSC_USE_INFO) 750 PetscInt ct=0,max=0; 751 #endif 752 753 PetscFunctionBegin; 754 if (baij->ht) PetscFunctionReturn(0); 755 756 baij->ht_size = (PetscInt)(factor*nz); 757 ht_size = baij->ht_size; 758 759 /* Allocate Memory for Hash Table */ 760 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 761 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 762 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 763 HD = baij->hd; 764 HT = baij->ht; 765 766 /* Loop Over A */ 767 for (i=0; i<a->mbs; i++) { 768 for (j=ai[i]; j<ai[i+1]; j++) { 769 row = i+rstart; 770 col = aj[j]+cstart; 771 772 key = row*Nbs + col + 1; 773 h1 = HASH(ht_size,key,tmp); 774 for (k=0; k<ht_size; k++){ 775 if (!HT[(h1+k)%ht_size]) { 776 HT[(h1+k)%ht_size] = key; 777 HD[(h1+k)%ht_size] = a->a + j*bs2; 778 break; 779 #if defined(PETSC_USE_INFO) 780 } else { 781 ct++; 782 #endif 783 } 784 } 785 #if defined(PETSC_USE_INFO) 786 if (k> max) max = k; 787 #endif 788 } 789 } 790 /* Loop Over B */ 791 for (i=0; i<b->mbs; i++) { 792 for (j=bi[i]; j<bi[i+1]; j++) { 793 row = i+rstart; 794 col = garray[bj[j]]; 795 key = row*Nbs + col + 1; 796 h1 = HASH(ht_size,key,tmp); 797 for (k=0; k<ht_size; k++){ 798 if (!HT[(h1+k)%ht_size]) { 799 HT[(h1+k)%ht_size] = key; 800 HD[(h1+k)%ht_size] = b->a + j*bs2; 801 break; 802 #if defined(PETSC_USE_INFO) 803 } else { 804 ct++; 805 #endif 806 } 807 } 808 #if defined(PETSC_USE_INFO) 809 if (k> max) max = k; 810 #endif 811 } 812 } 813 814 /* Print Summary */ 815 #if defined(PETSC_USE_INFO) 816 for (i=0,j=0; i<ht_size; i++) { 817 if (HT[i]) {j++;} 818 } 819 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 820 #endif 821 PetscFunctionReturn(0); 822 } 823 824 #undef __FUNCT__ 825 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 826 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 827 { 828 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 829 PetscErrorCode ierr; 830 PetscInt nstash,reallocs; 831 InsertMode addv; 832 833 PetscFunctionBegin; 834 if (baij->donotstash) { 835 PetscFunctionReturn(0); 836 } 837 838 /* make sure all processors are either in INSERTMODE or ADDMODE */ 839 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 840 if (addv == (ADD_VALUES|INSERT_VALUES)) { 841 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 842 } 843 mat->insertmode = addv; /* in case this processor had no cache */ 844 845 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 846 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 847 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 848 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 849 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 850 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 851 PetscFunctionReturn(0); 852 } 853 854 #undef __FUNCT__ 855 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 856 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 857 { 858 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 859 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 860 PetscErrorCode ierr; 861 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 862 PetscInt *row,*col; 863 PetscTruth r1,r2,r3,other_disassembled; 864 MatScalar *val; 865 InsertMode addv = mat->insertmode; 866 PetscMPIInt n; 867 868 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 869 PetscFunctionBegin; 870 if (!baij->donotstash) { 871 while (1) { 872 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 873 if (!flg) break; 874 875 for (i=0; i<n;) { 876 /* Now identify the consecutive vals belonging to the same row */ 877 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 878 if (j < n) ncols = j-i; 879 else ncols = n-i; 880 /* Now assemble all these values with a single function call */ 881 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 882 i = j; 883 } 884 } 885 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 886 /* Now process the block-stash. Since the values are stashed column-oriented, 887 set the roworiented flag to column oriented, and after MatSetValues() 888 restore the original flags */ 889 r1 = baij->roworiented; 890 r2 = a->roworiented; 891 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 892 baij->roworiented = PETSC_FALSE; 893 a->roworiented = PETSC_FALSE; 894 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 895 while (1) { 896 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 897 if (!flg) break; 898 899 for (i=0; i<n;) { 900 /* Now identify the consecutive vals belonging to the same row */ 901 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 902 if (j < n) ncols = j-i; 903 else ncols = n-i; 904 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 905 i = j; 906 } 907 } 908 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 909 baij->roworiented = r1; 910 a->roworiented = r2; 911 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 912 } 913 914 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 915 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 916 917 /* determine if any processor has disassembled, if so we must 918 also disassemble ourselfs, in order that we may reassemble. */ 919 /* 920 if nonzero structure of submatrix B cannot change then we know that 921 no processor disassembled thus we can skip this stuff 922 */ 923 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 924 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 925 if (mat->was_assembled && !other_disassembled) { 926 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 927 } 928 } 929 930 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 931 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 932 } 933 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 934 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 935 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 936 937 #if defined(PETSC_USE_INFO) 938 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 939 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 940 baij->ht_total_ct = 0; 941 baij->ht_insert_ct = 0; 942 } 943 #endif 944 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 945 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 946 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 947 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 948 } 949 950 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 951 baij->rowvalues = 0; 952 PetscFunctionReturn(0); 953 } 954 955 #undef __FUNCT__ 956 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 957 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 958 { 959 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 960 PetscErrorCode ierr; 961 PetscMPIInt size = baij->size,rank = baij->rank; 962 PetscInt bs = mat->rmap->bs; 963 PetscTruth iascii,isdraw; 964 PetscViewer sviewer; 965 PetscViewerFormat format; 966 967 PetscFunctionBegin; 968 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 969 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 970 if (iascii) { 971 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 972 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 973 MatInfo info; 974 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 975 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 976 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 977 rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 978 mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 979 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 980 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 981 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 982 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 983 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 984 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 985 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 986 PetscFunctionReturn(0); 987 } else if (format == PETSC_VIEWER_ASCII_INFO) { 988 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 989 PetscFunctionReturn(0); 990 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 991 PetscFunctionReturn(0); 992 } 993 } 994 995 if (isdraw) { 996 PetscDraw draw; 997 PetscTruth isnull; 998 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 999 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1000 } 1001 1002 if (size == 1) { 1003 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1004 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1005 } else { 1006 /* assemble the entire matrix onto first processor. */ 1007 Mat A; 1008 Mat_SeqBAIJ *Aloc; 1009 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1010 MatScalar *a; 1011 1012 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1013 /* Perhaps this should be the type of mat? */ 1014 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1015 if (!rank) { 1016 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1017 } else { 1018 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1019 } 1020 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1021 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1022 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1023 1024 /* copy over the A part */ 1025 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1026 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1027 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1028 1029 for (i=0; i<mbs; i++) { 1030 rvals[0] = bs*(baij->rstartbs + i); 1031 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1032 for (j=ai[i]; j<ai[i+1]; j++) { 1033 col = (baij->cstartbs+aj[j])*bs; 1034 for (k=0; k<bs; k++) { 1035 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1036 col++; a += bs; 1037 } 1038 } 1039 } 1040 /* copy over the B part */ 1041 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1042 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1043 for (i=0; i<mbs; i++) { 1044 rvals[0] = bs*(baij->rstartbs + i); 1045 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1046 for (j=ai[i]; j<ai[i+1]; j++) { 1047 col = baij->garray[aj[j]]*bs; 1048 for (k=0; k<bs; k++) { 1049 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1050 col++; a += bs; 1051 } 1052 } 1053 } 1054 ierr = PetscFree(rvals);CHKERRQ(ierr); 1055 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1056 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1057 /* 1058 Everyone has to call to draw the matrix since the graphics waits are 1059 synchronized across all processors that share the PetscDraw object 1060 */ 1061 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1062 if (!rank) { 1063 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1064 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1065 } 1066 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1067 ierr = MatDestroy(A);CHKERRQ(ierr); 1068 } 1069 PetscFunctionReturn(0); 1070 } 1071 1072 #undef __FUNCT__ 1073 #define __FUNCT__ "MatView_MPIBAIJ" 1074 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1075 { 1076 PetscErrorCode ierr; 1077 PetscTruth iascii,isdraw,issocket,isbinary; 1078 1079 PetscFunctionBegin; 1080 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1081 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1082 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1083 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1084 if (iascii || isdraw || issocket || isbinary) { 1085 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1086 } else { 1087 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1088 } 1089 PetscFunctionReturn(0); 1090 } 1091 1092 #undef __FUNCT__ 1093 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1094 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1095 { 1096 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1097 PetscErrorCode ierr; 1098 1099 PetscFunctionBegin; 1100 #if defined(PETSC_USE_LOG) 1101 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1102 #endif 1103 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1104 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1105 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1106 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1107 #if defined (PETSC_USE_CTABLE) 1108 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 1109 #else 1110 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1111 #endif 1112 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1113 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1114 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1115 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1116 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1117 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1118 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1119 ierr = PetscFree(baij);CHKERRQ(ierr); 1120 1121 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1122 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1123 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1124 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1125 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1126 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1127 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1128 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1129 PetscFunctionReturn(0); 1130 } 1131 1132 #undef __FUNCT__ 1133 #define __FUNCT__ "MatMult_MPIBAIJ" 1134 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1135 { 1136 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1137 PetscErrorCode ierr; 1138 PetscInt nt; 1139 1140 PetscFunctionBegin; 1141 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1142 if (nt != A->cmap->n) { 1143 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1144 } 1145 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1146 if (nt != A->rmap->n) { 1147 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1148 } 1149 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1150 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1151 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1152 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1153 PetscFunctionReturn(0); 1154 } 1155 1156 #undef __FUNCT__ 1157 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1158 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1159 { 1160 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1161 PetscErrorCode ierr; 1162 1163 PetscFunctionBegin; 1164 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1165 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1166 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1167 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1168 PetscFunctionReturn(0); 1169 } 1170 1171 #undef __FUNCT__ 1172 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1173 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1174 { 1175 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1176 PetscErrorCode ierr; 1177 PetscTruth merged; 1178 1179 PetscFunctionBegin; 1180 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1181 /* do nondiagonal part */ 1182 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1183 if (!merged) { 1184 /* send it on its way */ 1185 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1186 /* do local part */ 1187 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1188 /* receive remote parts: note this assumes the values are not actually */ 1189 /* inserted in yy until the next line */ 1190 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1191 } else { 1192 /* do local part */ 1193 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1194 /* send it on its way */ 1195 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1196 /* values actually were received in the Begin() but we need to call this nop */ 1197 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1198 } 1199 PetscFunctionReturn(0); 1200 } 1201 1202 #undef __FUNCT__ 1203 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1204 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1205 { 1206 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1207 PetscErrorCode ierr; 1208 1209 PetscFunctionBegin; 1210 /* do nondiagonal part */ 1211 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1212 /* send it on its way */ 1213 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1214 /* do local part */ 1215 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1216 /* receive remote parts: note this assumes the values are not actually */ 1217 /* inserted in yy until the next line, which is true for my implementation*/ 1218 /* but is not perhaps always true. */ 1219 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1220 PetscFunctionReturn(0); 1221 } 1222 1223 /* 1224 This only works correctly for square matrices where the subblock A->A is the 1225 diagonal block 1226 */ 1227 #undef __FUNCT__ 1228 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1229 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1230 { 1231 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1232 PetscErrorCode ierr; 1233 1234 PetscFunctionBegin; 1235 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1236 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1237 PetscFunctionReturn(0); 1238 } 1239 1240 #undef __FUNCT__ 1241 #define __FUNCT__ "MatScale_MPIBAIJ" 1242 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1243 { 1244 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1245 PetscErrorCode ierr; 1246 1247 PetscFunctionBegin; 1248 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1249 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1250 PetscFunctionReturn(0); 1251 } 1252 1253 #undef __FUNCT__ 1254 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1255 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1256 { 1257 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1258 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1259 PetscErrorCode ierr; 1260 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1261 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1262 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1263 1264 PetscFunctionBegin; 1265 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1266 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1267 mat->getrowactive = PETSC_TRUE; 1268 1269 if (!mat->rowvalues && (idx || v)) { 1270 /* 1271 allocate enough space to hold information from the longest row. 1272 */ 1273 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1274 PetscInt max = 1,mbs = mat->mbs,tmp; 1275 for (i=0; i<mbs; i++) { 1276 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1277 if (max < tmp) { max = tmp; } 1278 } 1279 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1280 } 1281 lrow = row - brstart; 1282 1283 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1284 if (!v) {pvA = 0; pvB = 0;} 1285 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1286 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1287 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1288 nztot = nzA + nzB; 1289 1290 cmap = mat->garray; 1291 if (v || idx) { 1292 if (nztot) { 1293 /* Sort by increasing column numbers, assuming A and B already sorted */ 1294 PetscInt imark = -1; 1295 if (v) { 1296 *v = v_p = mat->rowvalues; 1297 for (i=0; i<nzB; i++) { 1298 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1299 else break; 1300 } 1301 imark = i; 1302 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1303 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1304 } 1305 if (idx) { 1306 *idx = idx_p = mat->rowindices; 1307 if (imark > -1) { 1308 for (i=0; i<imark; i++) { 1309 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1310 } 1311 } else { 1312 for (i=0; i<nzB; i++) { 1313 if (cmap[cworkB[i]/bs] < cstart) 1314 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1315 else break; 1316 } 1317 imark = i; 1318 } 1319 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1320 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1321 } 1322 } else { 1323 if (idx) *idx = 0; 1324 if (v) *v = 0; 1325 } 1326 } 1327 *nz = nztot; 1328 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1329 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1330 PetscFunctionReturn(0); 1331 } 1332 1333 #undef __FUNCT__ 1334 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1335 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1336 { 1337 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1338 1339 PetscFunctionBegin; 1340 if (!baij->getrowactive) { 1341 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1342 } 1343 baij->getrowactive = PETSC_FALSE; 1344 PetscFunctionReturn(0); 1345 } 1346 1347 #undef __FUNCT__ 1348 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1349 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1350 { 1351 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1352 PetscErrorCode ierr; 1353 1354 PetscFunctionBegin; 1355 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1356 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1357 PetscFunctionReturn(0); 1358 } 1359 1360 #undef __FUNCT__ 1361 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1362 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1363 { 1364 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1365 Mat A = a->A,B = a->B; 1366 PetscErrorCode ierr; 1367 PetscReal isend[5],irecv[5]; 1368 1369 PetscFunctionBegin; 1370 info->block_size = (PetscReal)matin->rmap->bs; 1371 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1372 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1373 isend[3] = info->memory; isend[4] = info->mallocs; 1374 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1375 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1376 isend[3] += info->memory; isend[4] += info->mallocs; 1377 if (flag == MAT_LOCAL) { 1378 info->nz_used = isend[0]; 1379 info->nz_allocated = isend[1]; 1380 info->nz_unneeded = isend[2]; 1381 info->memory = isend[3]; 1382 info->mallocs = isend[4]; 1383 } else if (flag == MAT_GLOBAL_MAX) { 1384 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1385 info->nz_used = irecv[0]; 1386 info->nz_allocated = irecv[1]; 1387 info->nz_unneeded = irecv[2]; 1388 info->memory = irecv[3]; 1389 info->mallocs = irecv[4]; 1390 } else if (flag == MAT_GLOBAL_SUM) { 1391 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1392 info->nz_used = irecv[0]; 1393 info->nz_allocated = irecv[1]; 1394 info->nz_unneeded = irecv[2]; 1395 info->memory = irecv[3]; 1396 info->mallocs = irecv[4]; 1397 } else { 1398 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1399 } 1400 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1401 info->fill_ratio_needed = 0; 1402 info->factor_mallocs = 0; 1403 PetscFunctionReturn(0); 1404 } 1405 1406 #undef __FUNCT__ 1407 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1408 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg) 1409 { 1410 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1411 PetscErrorCode ierr; 1412 1413 PetscFunctionBegin; 1414 switch (op) { 1415 case MAT_NEW_NONZERO_LOCATIONS: 1416 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1417 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1418 case MAT_KEEP_NONZERO_PATTERN: 1419 case MAT_NEW_NONZERO_LOCATION_ERR: 1420 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1421 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1422 break; 1423 case MAT_ROW_ORIENTED: 1424 a->roworiented = flg; 1425 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1426 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1427 break; 1428 case MAT_NEW_DIAGONALS: 1429 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1430 break; 1431 case MAT_IGNORE_OFF_PROC_ENTRIES: 1432 a->donotstash = flg; 1433 break; 1434 case MAT_USE_HASH_TABLE: 1435 a->ht_flag = flg; 1436 break; 1437 case MAT_SYMMETRIC: 1438 case MAT_STRUCTURALLY_SYMMETRIC: 1439 case MAT_HERMITIAN: 1440 case MAT_SYMMETRY_ETERNAL: 1441 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1442 break; 1443 default: 1444 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1445 } 1446 PetscFunctionReturn(0); 1447 } 1448 1449 #undef __FUNCT__ 1450 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1451 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1452 { 1453 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1454 Mat_SeqBAIJ *Aloc; 1455 Mat B; 1456 PetscErrorCode ierr; 1457 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1458 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1459 MatScalar *a; 1460 1461 PetscFunctionBegin; 1462 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1463 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1464 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1465 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1466 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1467 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1468 } else { 1469 B = *matout; 1470 } 1471 1472 /* copy over the A part */ 1473 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1474 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1475 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1476 1477 for (i=0; i<mbs; i++) { 1478 rvals[0] = bs*(baij->rstartbs + i); 1479 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1480 for (j=ai[i]; j<ai[i+1]; j++) { 1481 col = (baij->cstartbs+aj[j])*bs; 1482 for (k=0; k<bs; k++) { 1483 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1484 col++; a += bs; 1485 } 1486 } 1487 } 1488 /* copy over the B part */ 1489 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1490 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1491 for (i=0; i<mbs; i++) { 1492 rvals[0] = bs*(baij->rstartbs + i); 1493 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1494 for (j=ai[i]; j<ai[i+1]; j++) { 1495 col = baij->garray[aj[j]]*bs; 1496 for (k=0; k<bs; k++) { 1497 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1498 col++; a += bs; 1499 } 1500 } 1501 } 1502 ierr = PetscFree(rvals);CHKERRQ(ierr); 1503 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1504 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1505 1506 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1507 *matout = B; 1508 } else { 1509 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1510 } 1511 PetscFunctionReturn(0); 1512 } 1513 1514 #undef __FUNCT__ 1515 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1516 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1517 { 1518 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1519 Mat a = baij->A,b = baij->B; 1520 PetscErrorCode ierr; 1521 PetscInt s1,s2,s3; 1522 1523 PetscFunctionBegin; 1524 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1525 if (rr) { 1526 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1527 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1528 /* Overlap communication with computation. */ 1529 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1530 } 1531 if (ll) { 1532 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1533 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1534 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1535 } 1536 /* scale the diagonal block */ 1537 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1538 1539 if (rr) { 1540 /* Do a scatter end and then right scale the off-diagonal block */ 1541 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1542 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1543 } 1544 1545 PetscFunctionReturn(0); 1546 } 1547 1548 #undef __FUNCT__ 1549 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1550 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 1551 { 1552 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1553 PetscErrorCode ierr; 1554 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1555 PetscInt i,*owners = A->rmap->range; 1556 PetscInt *nprocs,j,idx,nsends,row; 1557 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1558 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1559 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1560 MPI_Comm comm = ((PetscObject)A)->comm; 1561 MPI_Request *send_waits,*recv_waits; 1562 MPI_Status recv_status,*send_status; 1563 #if defined(PETSC_DEBUG) 1564 PetscTruth found = PETSC_FALSE; 1565 #endif 1566 1567 PetscFunctionBegin; 1568 /* first count number of contributors to each processor */ 1569 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1570 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1571 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1572 j = 0; 1573 for (i=0; i<N; i++) { 1574 if (lastidx > (idx = rows[i])) j = 0; 1575 lastidx = idx; 1576 for (; j<size; j++) { 1577 if (idx >= owners[j] && idx < owners[j+1]) { 1578 nprocs[2*j]++; 1579 nprocs[2*j+1] = 1; 1580 owner[i] = j; 1581 #if defined(PETSC_DEBUG) 1582 found = PETSC_TRUE; 1583 #endif 1584 break; 1585 } 1586 } 1587 #if defined(PETSC_DEBUG) 1588 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1589 found = PETSC_FALSE; 1590 #endif 1591 } 1592 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1593 1594 /* inform other processors of number of messages and max length*/ 1595 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1596 1597 /* post receives: */ 1598 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1599 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1600 for (i=0; i<nrecvs; i++) { 1601 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1602 } 1603 1604 /* do sends: 1605 1) starts[i] gives the starting index in svalues for stuff going to 1606 the ith processor 1607 */ 1608 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1609 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1610 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1611 starts[0] = 0; 1612 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1613 for (i=0; i<N; i++) { 1614 svalues[starts[owner[i]]++] = rows[i]; 1615 } 1616 1617 starts[0] = 0; 1618 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1619 count = 0; 1620 for (i=0; i<size; i++) { 1621 if (nprocs[2*i+1]) { 1622 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1623 } 1624 } 1625 ierr = PetscFree(starts);CHKERRQ(ierr); 1626 1627 base = owners[rank]; 1628 1629 /* wait on receives */ 1630 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1631 count = nrecvs; 1632 slen = 0; 1633 while (count) { 1634 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1635 /* unpack receives into our local space */ 1636 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1637 source[imdex] = recv_status.MPI_SOURCE; 1638 lens[imdex] = n; 1639 slen += n; 1640 count--; 1641 } 1642 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1643 1644 /* move the data into the send scatter */ 1645 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1646 count = 0; 1647 for (i=0; i<nrecvs; i++) { 1648 values = rvalues + i*nmax; 1649 for (j=0; j<lens[i]; j++) { 1650 lrows[count++] = values[j] - base; 1651 } 1652 } 1653 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1654 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1655 ierr = PetscFree(owner);CHKERRQ(ierr); 1656 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1657 1658 /* actually zap the local rows */ 1659 /* 1660 Zero the required rows. If the "diagonal block" of the matrix 1661 is square and the user wishes to set the diagonal we use separate 1662 code so that MatSetValues() is not called for each diagonal allocating 1663 new memory, thus calling lots of mallocs and slowing things down. 1664 1665 */ 1666 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1667 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr); 1668 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1669 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr); 1670 } else if (diag != 0.0) { 1671 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1672 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1673 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1674 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1675 } 1676 for (i=0; i<slen; i++) { 1677 row = lrows[i] + rstart_bs; 1678 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1679 } 1680 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1681 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1682 } else { 1683 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr); 1684 } 1685 1686 ierr = PetscFree(lrows);CHKERRQ(ierr); 1687 1688 /* wait on sends */ 1689 if (nsends) { 1690 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1691 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1692 ierr = PetscFree(send_status);CHKERRQ(ierr); 1693 } 1694 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1695 ierr = PetscFree(svalues);CHKERRQ(ierr); 1696 1697 PetscFunctionReturn(0); 1698 } 1699 1700 #undef __FUNCT__ 1701 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1702 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1703 { 1704 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1705 PetscErrorCode ierr; 1706 1707 PetscFunctionBegin; 1708 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1709 PetscFunctionReturn(0); 1710 } 1711 1712 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1713 1714 #undef __FUNCT__ 1715 #define __FUNCT__ "MatEqual_MPIBAIJ" 1716 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1717 { 1718 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1719 Mat a,b,c,d; 1720 PetscTruth flg; 1721 PetscErrorCode ierr; 1722 1723 PetscFunctionBegin; 1724 a = matA->A; b = matA->B; 1725 c = matB->A; d = matB->B; 1726 1727 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1728 if (flg) { 1729 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1730 } 1731 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1732 PetscFunctionReturn(0); 1733 } 1734 1735 #undef __FUNCT__ 1736 #define __FUNCT__ "MatCopy_MPIBAIJ" 1737 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1738 { 1739 PetscErrorCode ierr; 1740 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1741 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1742 1743 PetscFunctionBegin; 1744 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1745 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1746 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1747 } else { 1748 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1749 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1750 } 1751 PetscFunctionReturn(0); 1752 } 1753 1754 #undef __FUNCT__ 1755 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1756 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1757 { 1758 PetscErrorCode ierr; 1759 1760 PetscFunctionBegin; 1761 ierr = MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1762 PetscFunctionReturn(0); 1763 } 1764 1765 #undef __FUNCT__ 1766 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1767 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1768 { 1769 PetscErrorCode ierr; 1770 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1771 PetscBLASInt bnz,one=1; 1772 Mat_SeqBAIJ *x,*y; 1773 1774 PetscFunctionBegin; 1775 if (str == SAME_NONZERO_PATTERN) { 1776 PetscScalar alpha = a; 1777 x = (Mat_SeqBAIJ *)xx->A->data; 1778 y = (Mat_SeqBAIJ *)yy->A->data; 1779 bnz = PetscBLASIntCast(x->nz); 1780 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1781 x = (Mat_SeqBAIJ *)xx->B->data; 1782 y = (Mat_SeqBAIJ *)yy->B->data; 1783 bnz = PetscBLASIntCast(x->nz); 1784 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1785 } else { 1786 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1787 } 1788 PetscFunctionReturn(0); 1789 } 1790 1791 #undef __FUNCT__ 1792 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ" 1793 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs) 1794 { 1795 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1796 PetscInt rbs,cbs; 1797 PetscErrorCode ierr; 1798 1799 PetscFunctionBegin; 1800 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1801 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1802 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 1803 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 1804 if (rbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 1805 if (cbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 1806 PetscFunctionReturn(0); 1807 } 1808 1809 #undef __FUNCT__ 1810 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1811 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1812 { 1813 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1814 PetscErrorCode ierr; 1815 1816 PetscFunctionBegin; 1817 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1818 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1819 PetscFunctionReturn(0); 1820 } 1821 1822 #undef __FUNCT__ 1823 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 1824 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1825 { 1826 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1827 PetscErrorCode ierr; 1828 1829 PetscFunctionBegin; 1830 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1831 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1832 PetscFunctionReturn(0); 1833 } 1834 1835 #undef __FUNCT__ 1836 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 1837 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1838 { 1839 PetscErrorCode ierr; 1840 IS iscol_local; 1841 PetscInt csize; 1842 1843 PetscFunctionBegin; 1844 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1845 if (call == MAT_REUSE_MATRIX) { 1846 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1847 if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1848 } else { 1849 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1850 } 1851 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1852 if (call == MAT_INITIAL_MATRIX) { 1853 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1854 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 1855 } 1856 PetscFunctionReturn(0); 1857 } 1858 1859 #undef __FUNCT__ 1860 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 1861 /* 1862 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1863 in local and then by concatenating the local matrices the end result. 1864 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 1865 */ 1866 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 1867 { 1868 PetscErrorCode ierr; 1869 PetscMPIInt rank,size; 1870 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 1871 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 1872 Mat *local,M,Mreuse; 1873 MatScalar *vwork,*aa; 1874 MPI_Comm comm = ((PetscObject)mat)->comm; 1875 Mat_SeqBAIJ *aij; 1876 1877 1878 PetscFunctionBegin; 1879 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1880 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1881 1882 if (call == MAT_REUSE_MATRIX) { 1883 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1884 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1885 local = &Mreuse; 1886 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1887 } else { 1888 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1889 Mreuse = *local; 1890 ierr = PetscFree(local);CHKERRQ(ierr); 1891 } 1892 1893 /* 1894 m - number of local rows 1895 n - number of columns (same on all processors) 1896 rstart - first row in new global matrix generated 1897 */ 1898 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 1899 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1900 m = m/bs; 1901 n = n/bs; 1902 1903 if (call == MAT_INITIAL_MATRIX) { 1904 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1905 ii = aij->i; 1906 jj = aij->j; 1907 1908 /* 1909 Determine the number of non-zeros in the diagonal and off-diagonal 1910 portions of the matrix in order to do correct preallocation 1911 */ 1912 1913 /* first get start and end of "diagonal" columns */ 1914 if (csize == PETSC_DECIDE) { 1915 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 1916 if (mglobal == n*bs) { /* square matrix */ 1917 nlocal = m; 1918 } else { 1919 nlocal = n/size + ((n % size) > rank); 1920 } 1921 } else { 1922 nlocal = csize/bs; 1923 } 1924 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1925 rstart = rend - nlocal; 1926 if (rank == size - 1 && rend != n) { 1927 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 1928 } 1929 1930 /* next, compute all the lengths */ 1931 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 1932 olens = dlens + m; 1933 for (i=0; i<m; i++) { 1934 jend = ii[i+1] - ii[i]; 1935 olen = 0; 1936 dlen = 0; 1937 for (j=0; j<jend; j++) { 1938 if (*jj < rstart || *jj >= rend) olen++; 1939 else dlen++; 1940 jj++; 1941 } 1942 olens[i] = olen; 1943 dlens[i] = dlen; 1944 } 1945 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 1946 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 1947 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 1948 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 1949 ierr = PetscFree(dlens);CHKERRQ(ierr); 1950 } else { 1951 PetscInt ml,nl; 1952 1953 M = *newmat; 1954 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1955 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1956 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1957 /* 1958 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1959 rather than the slower MatSetValues(). 1960 */ 1961 M->was_assembled = PETSC_TRUE; 1962 M->assembled = PETSC_FALSE; 1963 } 1964 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1965 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 1966 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1967 ii = aij->i; 1968 jj = aij->j; 1969 aa = aij->a; 1970 for (i=0; i<m; i++) { 1971 row = rstart/bs + i; 1972 nz = ii[i+1] - ii[i]; 1973 cwork = jj; jj += nz; 1974 vwork = aa; aa += nz; 1975 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 1976 } 1977 1978 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1979 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1980 *newmat = M; 1981 1982 /* save submatrix used in processor for next request */ 1983 if (call == MAT_INITIAL_MATRIX) { 1984 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 1985 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 1986 } 1987 1988 PetscFunctionReturn(0); 1989 } 1990 1991 #undef __FUNCT__ 1992 #define __FUNCT__ "MatPermute_MPIBAIJ" 1993 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 1994 { 1995 MPI_Comm comm,pcomm; 1996 PetscInt first,local_size,nrows; 1997 const PetscInt *rows; 1998 PetscMPIInt size; 1999 IS crowp,growp,irowp,lrowp,lcolp,icolp; 2000 PetscErrorCode ierr; 2001 2002 PetscFunctionBegin; 2003 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2004 /* make a collective version of 'rowp' */ 2005 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2006 if (pcomm==comm) { 2007 crowp = rowp; 2008 } else { 2009 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2010 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2011 ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr); 2012 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2013 } 2014 /* collect the global row permutation and invert it */ 2015 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2016 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2017 if (pcomm!=comm) { 2018 ierr = ISDestroy(crowp);CHKERRQ(ierr); 2019 } 2020 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2021 /* get the local target indices */ 2022 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2023 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 2024 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2025 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr); 2026 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2027 ierr = ISDestroy(irowp);CHKERRQ(ierr); 2028 /* the column permutation is so much easier; 2029 make a local version of 'colp' and invert it */ 2030 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2031 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2032 if (size==1) { 2033 lcolp = colp; 2034 } else { 2035 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 2036 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 2037 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr); 2038 } 2039 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2040 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 2041 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 2042 if (size>1) { 2043 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 2044 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 2045 } 2046 /* now we just get the submatrix */ 2047 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2048 /* clean up */ 2049 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 2050 ierr = ISDestroy(icolp);CHKERRQ(ierr); 2051 PetscFunctionReturn(0); 2052 } 2053 2054 #undef __FUNCT__ 2055 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2056 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2057 { 2058 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2059 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2060 2061 PetscFunctionBegin; 2062 if (nghosts) { *nghosts = B->nbs;} 2063 if (ghosts) {*ghosts = baij->garray;} 2064 PetscFunctionReturn(0); 2065 } 2066 2067 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat); 2068 2069 #undef __FUNCT__ 2070 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2071 /* 2072 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2073 */ 2074 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2075 { 2076 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2077 PetscErrorCode ierr; 2078 PetscMPIInt size,*ncolsonproc,*disp,nn; 2079 PetscInt bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col; 2080 const PetscInt *is; 2081 PetscInt nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj; 2082 PetscInt *rowhit,M,cstart,cend,colb; 2083 PetscInt *columnsforrow,l; 2084 IS *isa; 2085 PetscTruth done,flg; 2086 ISLocalToGlobalMapping map = mat->bmapping; 2087 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2088 2089 PetscFunctionBegin; 2090 if (!mat->assembled) { 2091 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2092 } 2093 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2094 2095 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2096 2097 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2098 M = mat->rmap->n/bs; 2099 cstart = mat->cmap->rstart/bs; 2100 cend = mat->cmap->rend/bs; 2101 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2102 c->N = mat->cmap->N/bs; 2103 c->m = mat->rmap->n/bs; 2104 c->rstart = mat->rmap->rstart/bs; 2105 2106 c->ncolors = nis; 2107 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2108 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2109 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2110 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2111 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2112 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2113 2114 /* Allow access to data structures of local part of matrix */ 2115 if (!baij->colmap) { 2116 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2117 } 2118 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2119 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2120 2121 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2122 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2123 2124 for (i=0; i<nis; i++) { 2125 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2126 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2127 c->ncolumns[i] = n; 2128 if (n) { 2129 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2130 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2131 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2132 } else { 2133 c->columns[i] = 0; 2134 } 2135 2136 if (ctype == IS_COLORING_GLOBAL){ 2137 /* Determine the total (parallel) number of columns of this color */ 2138 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2139 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2140 2141 nn = PetscMPIIntCast(n); 2142 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2143 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2144 if (!nctot) { 2145 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2146 } 2147 2148 disp[0] = 0; 2149 for (j=1; j<size; j++) { 2150 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2151 } 2152 2153 /* Get complete list of columns for color on each processor */ 2154 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2155 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2156 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2157 } else if (ctype == IS_COLORING_GHOSTED){ 2158 /* Determine local number of columns of this color on this process, including ghost points */ 2159 nctot = n; 2160 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2161 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2162 } else { 2163 SETERRQ(PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2164 } 2165 2166 /* 2167 Mark all rows affect by these columns 2168 */ 2169 /* Temporary option to allow for debugging/testing */ 2170 flg = PETSC_FALSE; 2171 ierr = PetscOptionsGetTruth(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2172 if (!flg) {/*-----------------------------------------------------------------------------*/ 2173 /* crude, fast version */ 2174 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2175 /* loop over columns*/ 2176 for (j=0; j<nctot; j++) { 2177 if (ctype == IS_COLORING_GHOSTED) { 2178 col = ltog[cols[j]]; 2179 } else { 2180 col = cols[j]; 2181 } 2182 if (col >= cstart && col < cend) { 2183 /* column is in diagonal block of matrix */ 2184 rows = A_cj + A_ci[col-cstart]; 2185 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2186 } else { 2187 #if defined (PETSC_USE_CTABLE) 2188 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr) 2189 colb --; 2190 #else 2191 colb = baij->colmap[col] - 1; 2192 #endif 2193 if (colb == -1) { 2194 m = 0; 2195 } else { 2196 colb = colb/bs; 2197 rows = B_cj + B_ci[colb]; 2198 m = B_ci[colb+1] - B_ci[colb]; 2199 } 2200 } 2201 /* loop over columns marking them in rowhit */ 2202 for (k=0; k<m; k++) { 2203 rowhit[*rows++] = col + 1; 2204 } 2205 } 2206 2207 /* count the number of hits */ 2208 nrows = 0; 2209 for (j=0; j<M; j++) { 2210 if (rowhit[j]) nrows++; 2211 } 2212 c->nrows[i] = nrows; 2213 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2214 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2215 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2216 nrows = 0; 2217 for (j=0; j<M; j++) { 2218 if (rowhit[j]) { 2219 c->rows[i][nrows] = j; 2220 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2221 nrows++; 2222 } 2223 } 2224 } else {/*-------------------------------------------------------------------------------*/ 2225 /* slow version, using rowhit as a linked list */ 2226 PetscInt currentcol,fm,mfm; 2227 rowhit[M] = M; 2228 nrows = 0; 2229 /* loop over columns*/ 2230 for (j=0; j<nctot; j++) { 2231 if (ctype == IS_COLORING_GHOSTED) { 2232 col = ltog[cols[j]]; 2233 } else { 2234 col = cols[j]; 2235 } 2236 if (col >= cstart && col < cend) { 2237 /* column is in diagonal block of matrix */ 2238 rows = A_cj + A_ci[col-cstart]; 2239 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2240 } else { 2241 #if defined (PETSC_USE_CTABLE) 2242 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2243 colb --; 2244 #else 2245 colb = baij->colmap[col] - 1; 2246 #endif 2247 if (colb == -1) { 2248 m = 0; 2249 } else { 2250 colb = colb/bs; 2251 rows = B_cj + B_ci[colb]; 2252 m = B_ci[colb+1] - B_ci[colb]; 2253 } 2254 } 2255 2256 /* loop over columns marking them in rowhit */ 2257 fm = M; /* fm points to first entry in linked list */ 2258 for (k=0; k<m; k++) { 2259 currentcol = *rows++; 2260 /* is it already in the list? */ 2261 do { 2262 mfm = fm; 2263 fm = rowhit[fm]; 2264 } while (fm < currentcol); 2265 /* not in list so add it */ 2266 if (fm != currentcol) { 2267 nrows++; 2268 columnsforrow[currentcol] = col; 2269 /* next three lines insert new entry into linked list */ 2270 rowhit[mfm] = currentcol; 2271 rowhit[currentcol] = fm; 2272 fm = currentcol; 2273 /* fm points to present position in list since we know the columns are sorted */ 2274 } else { 2275 SETERRQ(PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2276 } 2277 } 2278 } 2279 c->nrows[i] = nrows; 2280 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2281 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2282 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2283 /* now store the linked list of rows into c->rows[i] */ 2284 nrows = 0; 2285 fm = rowhit[M]; 2286 do { 2287 c->rows[i][nrows] = fm; 2288 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2289 fm = rowhit[fm]; 2290 } while (fm < M); 2291 } /* ---------------------------------------------------------------------------------------*/ 2292 ierr = PetscFree(cols);CHKERRQ(ierr); 2293 } 2294 2295 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2296 /* 2297 vscale will contain the "diagonal" on processor scalings followed by the off processor 2298 */ 2299 if (ctype == IS_COLORING_GLOBAL) { 2300 PetscInt *garray; 2301 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2302 for (i=0; i<baij->B->cmap->n/bs; i++) { 2303 for (j=0; j<bs; j++) { 2304 garray[i*bs+j] = bs*baij->garray[i]+j; 2305 } 2306 } 2307 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2308 ierr = PetscFree(garray);CHKERRQ(ierr); 2309 CHKMEMQ; 2310 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2311 for (k=0; k<c->ncolors; k++) { 2312 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2313 for (l=0; l<c->nrows[k]; l++) { 2314 col = c->columnsforrow[k][l]; 2315 if (col >= cstart && col < cend) { 2316 /* column is in diagonal block of matrix */ 2317 colb = col - cstart; 2318 } else { 2319 /* column is in "off-processor" part */ 2320 #if defined (PETSC_USE_CTABLE) 2321 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2322 colb --; 2323 #else 2324 colb = baij->colmap[col] - 1; 2325 #endif 2326 colb = colb/bs; 2327 colb += cend - cstart; 2328 } 2329 c->vscaleforrow[k][l] = colb; 2330 } 2331 } 2332 } else if (ctype == IS_COLORING_GHOSTED) { 2333 /* Get gtol mapping */ 2334 PetscInt N = mat->cmap->N, *gtol; 2335 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2336 for (i=0; i<N; i++) gtol[i] = -1; 2337 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2338 2339 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2340 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2341 for (k=0; k<c->ncolors; k++) { 2342 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2343 for (l=0; l<c->nrows[k]; l++) { 2344 col = c->columnsforrow[k][l]; /* global column index */ 2345 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2346 } 2347 } 2348 ierr = PetscFree(gtol);CHKERRQ(ierr); 2349 } 2350 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2351 2352 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2353 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2354 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2355 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2356 CHKMEMQ; 2357 PetscFunctionReturn(0); 2358 } 2359 2360 #undef __FUNCT__ 2361 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ" 2362 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat) 2363 { 2364 Mat B; 2365 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2366 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2367 Mat_SeqAIJ *b; 2368 PetscErrorCode ierr; 2369 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2370 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2371 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2372 2373 PetscFunctionBegin; 2374 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2375 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2376 2377 /* ---------------------------------------------------------------- 2378 Tell every processor the number of nonzeros per row 2379 */ 2380 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2381 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2382 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2383 } 2384 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2385 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2386 displs = recvcounts + size; 2387 for (i=0; i<size; i++) { 2388 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2389 displs[i] = A->rmap->range[i]/bs; 2390 } 2391 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2392 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2393 #else 2394 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2395 #endif 2396 /* --------------------------------------------------------------- 2397 Create the sequential matrix of the same type as the local block diagonal 2398 */ 2399 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2400 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2401 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2402 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2403 b = (Mat_SeqAIJ *)B->data; 2404 2405 /*-------------------------------------------------------------------- 2406 Copy my part of matrix column indices over 2407 */ 2408 sendcount = ad->nz + bd->nz; 2409 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2410 a_jsendbuf = ad->j; 2411 b_jsendbuf = bd->j; 2412 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2413 cnt = 0; 2414 for (i=0; i<n; i++) { 2415 2416 /* put in lower diagonal portion */ 2417 m = bd->i[i+1] - bd->i[i]; 2418 while (m > 0) { 2419 /* is it above diagonal (in bd (compressed) numbering) */ 2420 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2421 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2422 m--; 2423 } 2424 2425 /* put in diagonal portion */ 2426 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2427 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2428 } 2429 2430 /* put in upper diagonal portion */ 2431 while (m-- > 0) { 2432 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2433 } 2434 } 2435 if (cnt != sendcount) SETERRQ2(PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2436 2437 /*-------------------------------------------------------------------- 2438 Gather all column indices to all processors 2439 */ 2440 for (i=0; i<size; i++) { 2441 recvcounts[i] = 0; 2442 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2443 recvcounts[i] += lens[j]; 2444 } 2445 } 2446 displs[0] = 0; 2447 for (i=1; i<size; i++) { 2448 displs[i] = displs[i-1] + recvcounts[i-1]; 2449 } 2450 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2451 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2452 #else 2453 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2454 #endif 2455 /*-------------------------------------------------------------------- 2456 Assemble the matrix into useable form (note numerical values not yet set) 2457 */ 2458 /* set the b->ilen (length of each row) values */ 2459 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2460 /* set the b->i indices */ 2461 b->i[0] = 0; 2462 for (i=1; i<=A->rmap->N/bs; i++) { 2463 b->i[i] = b->i[i-1] + lens[i-1]; 2464 } 2465 ierr = PetscFree(lens);CHKERRQ(ierr); 2466 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2467 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2468 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2469 2470 if (A->symmetric){ 2471 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2472 } else if (A->hermitian) { 2473 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2474 } else if (A->structurally_symmetric) { 2475 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2476 } 2477 *newmat = B; 2478 PetscFunctionReturn(0); 2479 } 2480 2481 #undef __FUNCT__ 2482 #define __FUNCT__ "MatSOR_MPIBAIJ" 2483 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2484 { 2485 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2486 PetscErrorCode ierr; 2487 Vec bb1 = 0; 2488 2489 PetscFunctionBegin; 2490 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2491 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2492 } 2493 2494 if (flag == SOR_APPLY_UPPER) { 2495 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2496 PetscFunctionReturn(0); 2497 } 2498 2499 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2500 if (flag & SOR_ZERO_INITIAL_GUESS) { 2501 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2502 its--; 2503 } 2504 2505 while (its--) { 2506 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2507 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2508 2509 /* update rhs: bb1 = bb - B*x */ 2510 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2511 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2512 2513 /* local sweep */ 2514 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2515 } 2516 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 2517 if (flag & SOR_ZERO_INITIAL_GUESS) { 2518 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2519 its--; 2520 } 2521 while (its--) { 2522 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2523 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2524 2525 /* update rhs: bb1 = bb - B*x */ 2526 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2527 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2528 2529 /* local sweep */ 2530 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2531 } 2532 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 2533 if (flag & SOR_ZERO_INITIAL_GUESS) { 2534 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2535 its--; 2536 } 2537 while (its--) { 2538 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2539 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2540 2541 /* update rhs: bb1 = bb - B*x */ 2542 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2543 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2544 2545 /* local sweep */ 2546 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2547 } 2548 } else { 2549 SETERRQ(PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2550 } 2551 2552 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 2553 PetscFunctionReturn(0); 2554 } 2555 2556 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2557 2558 2559 /* -------------------------------------------------------------------*/ 2560 static struct _MatOps MatOps_Values = { 2561 MatSetValues_MPIBAIJ, 2562 MatGetRow_MPIBAIJ, 2563 MatRestoreRow_MPIBAIJ, 2564 MatMult_MPIBAIJ, 2565 /* 4*/ MatMultAdd_MPIBAIJ, 2566 MatMultTranspose_MPIBAIJ, 2567 MatMultTransposeAdd_MPIBAIJ, 2568 0, 2569 0, 2570 0, 2571 /*10*/ 0, 2572 0, 2573 0, 2574 MatSOR_MPIBAIJ, 2575 MatTranspose_MPIBAIJ, 2576 /*15*/ MatGetInfo_MPIBAIJ, 2577 MatEqual_MPIBAIJ, 2578 MatGetDiagonal_MPIBAIJ, 2579 MatDiagonalScale_MPIBAIJ, 2580 MatNorm_MPIBAIJ, 2581 /*20*/ MatAssemblyBegin_MPIBAIJ, 2582 MatAssemblyEnd_MPIBAIJ, 2583 MatSetOption_MPIBAIJ, 2584 MatZeroEntries_MPIBAIJ, 2585 /*24*/ MatZeroRows_MPIBAIJ, 2586 0, 2587 0, 2588 0, 2589 0, 2590 /*29*/ MatSetUpPreallocation_MPIBAIJ, 2591 0, 2592 0, 2593 0, 2594 0, 2595 /*34*/ MatDuplicate_MPIBAIJ, 2596 0, 2597 0, 2598 0, 2599 0, 2600 /*39*/ MatAXPY_MPIBAIJ, 2601 MatGetSubMatrices_MPIBAIJ, 2602 MatIncreaseOverlap_MPIBAIJ, 2603 MatGetValues_MPIBAIJ, 2604 MatCopy_MPIBAIJ, 2605 /*44*/ 0, 2606 MatScale_MPIBAIJ, 2607 0, 2608 0, 2609 0, 2610 /*49*/ MatSetBlockSize_MPIBAIJ, 2611 0, 2612 0, 2613 0, 2614 0, 2615 /*54*/ MatFDColoringCreate_MPIBAIJ, 2616 0, 2617 MatSetUnfactored_MPIBAIJ, 2618 MatPermute_MPIBAIJ, 2619 MatSetValuesBlocked_MPIBAIJ, 2620 /*59*/ MatGetSubMatrix_MPIBAIJ, 2621 MatDestroy_MPIBAIJ, 2622 MatView_MPIBAIJ, 2623 0, 2624 0, 2625 /*64*/ 0, 2626 0, 2627 0, 2628 0, 2629 0, 2630 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2631 0, 2632 0, 2633 0, 2634 0, 2635 /*74*/ 0, 2636 MatFDColoringApply_BAIJ, 2637 0, 2638 0, 2639 0, 2640 /*79*/ 0, 2641 0, 2642 0, 2643 0, 2644 MatLoad_MPIBAIJ, 2645 /*84*/ 0, 2646 0, 2647 0, 2648 0, 2649 0, 2650 /*89*/ 0, 2651 0, 2652 0, 2653 0, 2654 0, 2655 /*94*/ 0, 2656 0, 2657 0, 2658 0, 2659 0, 2660 /*99*/ 0, 2661 0, 2662 0, 2663 0, 2664 0, 2665 /*104*/0, 2666 MatRealPart_MPIBAIJ, 2667 MatImaginaryPart_MPIBAIJ, 2668 0, 2669 0, 2670 /*109*/0, 2671 0, 2672 0, 2673 0, 2674 0, 2675 /*114*/MatGetSeqNonzerostructure_MPIBAIJ, 2676 0, 2677 MatGetGhosts_MPIBAIJ 2678 }; 2679 2680 EXTERN_C_BEGIN 2681 #undef __FUNCT__ 2682 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2683 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2684 { 2685 PetscFunctionBegin; 2686 *a = ((Mat_MPIBAIJ *)A->data)->A; 2687 *iscopy = PETSC_FALSE; 2688 PetscFunctionReturn(0); 2689 } 2690 EXTERN_C_END 2691 2692 EXTERN_C_BEGIN 2693 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2694 EXTERN_C_END 2695 2696 EXTERN_C_BEGIN 2697 #undef __FUNCT__ 2698 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2699 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2700 { 2701 PetscInt m,rstart,cstart,cend; 2702 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2703 const PetscInt *JJ=0; 2704 PetscScalar *values=0; 2705 PetscErrorCode ierr; 2706 2707 PetscFunctionBegin; 2708 2709 if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2710 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2711 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2712 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2713 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2714 m = B->rmap->n/bs; 2715 rstart = B->rmap->rstart/bs; 2716 cstart = B->cmap->rstart/bs; 2717 cend = B->cmap->rend/bs; 2718 2719 if (ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2720 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2721 for (i=0; i<m; i++) { 2722 nz = ii[i+1] - ii[i]; 2723 if (nz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2724 nz_max = PetscMax(nz_max,nz); 2725 JJ = jj + ii[i]; 2726 for (j=0; j<nz; j++) { 2727 if (*JJ >= cstart) break; 2728 JJ++; 2729 } 2730 d = 0; 2731 for (; j<nz; j++) { 2732 if (*JJ++ >= cend) break; 2733 d++; 2734 } 2735 d_nnz[i] = d; 2736 o_nnz[i] = nz - d; 2737 } 2738 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2739 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2740 2741 values = (PetscScalar*)V; 2742 if (!values) { 2743 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2744 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2745 } 2746 for (i=0; i<m; i++) { 2747 PetscInt row = i + rstart; 2748 PetscInt ncols = ii[i+1] - ii[i]; 2749 const PetscInt *icols = jj + ii[i]; 2750 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2751 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2752 } 2753 2754 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2755 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2756 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2757 2758 PetscFunctionReturn(0); 2759 } 2760 EXTERN_C_END 2761 2762 #undef __FUNCT__ 2763 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2764 /*@C 2765 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2766 (the default parallel PETSc format). 2767 2768 Collective on MPI_Comm 2769 2770 Input Parameters: 2771 + A - the matrix 2772 . i - the indices into j for the start of each local row (starts with zero) 2773 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2774 - v - optional values in the matrix 2775 2776 Level: developer 2777 2778 .keywords: matrix, aij, compressed row, sparse, parallel 2779 2780 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2781 @*/ 2782 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2783 { 2784 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2785 2786 PetscFunctionBegin; 2787 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2788 if (f) { 2789 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2790 } 2791 PetscFunctionReturn(0); 2792 } 2793 2794 EXTERN_C_BEGIN 2795 #undef __FUNCT__ 2796 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2797 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2798 { 2799 Mat_MPIBAIJ *b; 2800 PetscErrorCode ierr; 2801 PetscInt i, newbs = PetscAbs(bs); 2802 2803 PetscFunctionBegin; 2804 if (bs < 0) { 2805 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 2806 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2807 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2808 bs = PetscAbs(bs); 2809 } 2810 if ((d_nnz || o_nnz) && newbs != bs) { 2811 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 2812 } 2813 bs = newbs; 2814 2815 2816 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2817 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2818 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2819 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2820 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2821 2822 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2823 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2824 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2825 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2826 2827 if (d_nnz) { 2828 for (i=0; i<B->rmap->n/bs; i++) { 2829 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2830 } 2831 } 2832 if (o_nnz) { 2833 for (i=0; i<B->rmap->n/bs; i++) { 2834 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2835 } 2836 } 2837 2838 b = (Mat_MPIBAIJ*)B->data; 2839 b->bs2 = bs*bs; 2840 b->mbs = B->rmap->n/bs; 2841 b->nbs = B->cmap->n/bs; 2842 b->Mbs = B->rmap->N/bs; 2843 b->Nbs = B->cmap->N/bs; 2844 2845 for (i=0; i<=b->size; i++) { 2846 b->rangebs[i] = B->rmap->range[i]/bs; 2847 } 2848 b->rstartbs = B->rmap->rstart/bs; 2849 b->rendbs = B->rmap->rend/bs; 2850 b->cstartbs = B->cmap->rstart/bs; 2851 b->cendbs = B->cmap->rend/bs; 2852 2853 if (!B->preallocated) { 2854 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2855 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2856 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2857 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2858 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2859 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2860 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2861 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2862 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 2863 } 2864 2865 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2866 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2867 B->preallocated = PETSC_TRUE; 2868 PetscFunctionReturn(0); 2869 } 2870 EXTERN_C_END 2871 2872 EXTERN_C_BEGIN 2873 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2874 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2875 EXTERN_C_END 2876 2877 2878 EXTERN_C_BEGIN 2879 #undef __FUNCT__ 2880 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 2881 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj) 2882 { 2883 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2884 PetscErrorCode ierr; 2885 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2886 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2887 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2888 2889 PetscFunctionBegin; 2890 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 2891 ii[0] = 0; 2892 CHKMEMQ; 2893 for (i=0; i<M; i++) { 2894 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2895 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 2896 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2897 /* remove one from count of matrix has diagonal */ 2898 for (j=id[i]; j<id[i+1]; j++) { 2899 if (jd[j] == i) {ii[i+1]--;break;} 2900 } 2901 CHKMEMQ; 2902 } 2903 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2904 cnt = 0; 2905 for (i=0; i<M; i++) { 2906 for (j=io[i]; j<io[i+1]; j++) { 2907 if (garray[jo[j]] > rstart) break; 2908 jj[cnt++] = garray[jo[j]]; 2909 CHKMEMQ; 2910 } 2911 for (k=id[i]; k<id[i+1]; k++) { 2912 if (jd[k] != i) { 2913 jj[cnt++] = rstart + jd[k]; 2914 CHKMEMQ; 2915 } 2916 } 2917 for (;j<io[i+1]; j++) { 2918 jj[cnt++] = garray[jo[j]]; 2919 CHKMEMQ; 2920 } 2921 } 2922 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 2923 PetscFunctionReturn(0); 2924 } 2925 EXTERN_C_END 2926 2927 /*MC 2928 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2929 2930 Options Database Keys: 2931 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2932 . -mat_block_size <bs> - set the blocksize used to store the matrix 2933 - -mat_use_hash_table <fact> 2934 2935 Level: beginner 2936 2937 .seealso: MatCreateMPIBAIJ 2938 M*/ 2939 2940 EXTERN_C_BEGIN 2941 #undef __FUNCT__ 2942 #define __FUNCT__ "MatCreate_MPIBAIJ" 2943 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 2944 { 2945 Mat_MPIBAIJ *b; 2946 PetscErrorCode ierr; 2947 PetscTruth flg; 2948 2949 PetscFunctionBegin; 2950 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2951 B->data = (void*)b; 2952 2953 2954 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2955 B->mapping = 0; 2956 B->assembled = PETSC_FALSE; 2957 2958 B->insertmode = NOT_SET_VALUES; 2959 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 2960 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 2961 2962 /* build local table of row and column ownerships */ 2963 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 2964 2965 /* build cache for off array entries formed */ 2966 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 2967 b->donotstash = PETSC_FALSE; 2968 b->colmap = PETSC_NULL; 2969 b->garray = PETSC_NULL; 2970 b->roworiented = PETSC_TRUE; 2971 2972 /* stuff used in block assembly */ 2973 b->barray = 0; 2974 2975 /* stuff used for matrix vector multiply */ 2976 b->lvec = 0; 2977 b->Mvctx = 0; 2978 2979 /* stuff for MatGetRow() */ 2980 b->rowindices = 0; 2981 b->rowvalues = 0; 2982 b->getrowactive = PETSC_FALSE; 2983 2984 /* hash table stuff */ 2985 b->ht = 0; 2986 b->hd = 0; 2987 b->ht_size = 0; 2988 b->ht_flag = PETSC_FALSE; 2989 b->ht_fact = 0; 2990 b->ht_total_ct = 0; 2991 b->ht_insert_ct = 0; 2992 2993 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 2994 ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2995 if (flg) { 2996 PetscReal fact = 1.39; 2997 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2998 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 2999 if (fact <= 1.0) fact = 1.39; 3000 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3001 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3002 } 3003 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3004 3005 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 3006 "MatConvert_MPIBAIJ_MPIAdj", 3007 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3008 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3009 "MatStoreValues_MPIBAIJ", 3010 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3011 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3012 "MatRetrieveValues_MPIBAIJ", 3013 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3014 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3015 "MatGetDiagonalBlock_MPIBAIJ", 3016 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3017 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3018 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3019 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3020 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3021 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3022 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3023 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3024 "MatDiagonalScaleLocal_MPIBAIJ", 3025 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3026 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3027 "MatSetHashTableFactor_MPIBAIJ", 3028 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3029 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3030 PetscFunctionReturn(0); 3031 } 3032 EXTERN_C_END 3033 3034 /*MC 3035 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3036 3037 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3038 and MATMPIBAIJ otherwise. 3039 3040 Options Database Keys: 3041 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3042 3043 Level: beginner 3044 3045 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3046 M*/ 3047 3048 EXTERN_C_BEGIN 3049 #undef __FUNCT__ 3050 #define __FUNCT__ "MatCreate_BAIJ" 3051 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 3052 { 3053 PetscErrorCode ierr; 3054 PetscMPIInt size; 3055 3056 PetscFunctionBegin; 3057 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 3058 if (size == 1) { 3059 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 3060 } else { 3061 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 3062 } 3063 PetscFunctionReturn(0); 3064 } 3065 EXTERN_C_END 3066 3067 #undef __FUNCT__ 3068 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3069 /*@C 3070 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3071 (block compressed row). For good matrix assembly performance 3072 the user should preallocate the matrix storage by setting the parameters 3073 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3074 performance can be increased by more than a factor of 50. 3075 3076 Collective on Mat 3077 3078 Input Parameters: 3079 + A - the matrix 3080 . bs - size of blockk 3081 . d_nz - number of block nonzeros per block row in diagonal portion of local 3082 submatrix (same for all local rows) 3083 . d_nnz - array containing the number of block nonzeros in the various block rows 3084 of the in diagonal portion of the local (possibly different for each block 3085 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3086 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3087 submatrix (same for all local rows). 3088 - o_nnz - array containing the number of nonzeros in the various block rows of the 3089 off-diagonal portion of the local submatrix (possibly different for 3090 each block row) or PETSC_NULL. 3091 3092 If the *_nnz parameter is given then the *_nz parameter is ignored 3093 3094 Options Database Keys: 3095 + -mat_block_size - size of the blocks to use 3096 - -mat_use_hash_table <fact> 3097 3098 Notes: 3099 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3100 than it must be used on all processors that share the object for that argument. 3101 3102 Storage Information: 3103 For a square global matrix we define each processor's diagonal portion 3104 to be its local rows and the corresponding columns (a square submatrix); 3105 each processor's off-diagonal portion encompasses the remainder of the 3106 local matrix (a rectangular submatrix). 3107 3108 The user can specify preallocated storage for the diagonal part of 3109 the local submatrix with either d_nz or d_nnz (not both). Set 3110 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3111 memory allocation. Likewise, specify preallocated storage for the 3112 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3113 3114 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3115 the figure below we depict these three local rows and all columns (0-11). 3116 3117 .vb 3118 0 1 2 3 4 5 6 7 8 9 10 11 3119 ------------------- 3120 row 3 | o o o d d d o o o o o o 3121 row 4 | o o o d d d o o o o o o 3122 row 5 | o o o d d d o o o o o o 3123 ------------------- 3124 .ve 3125 3126 Thus, any entries in the d locations are stored in the d (diagonal) 3127 submatrix, and any entries in the o locations are stored in the 3128 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3129 stored simply in the MATSEQBAIJ format for compressed row storage. 3130 3131 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3132 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3133 In general, for PDE problems in which most nonzeros are near the diagonal, 3134 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3135 or you will get TERRIBLE performance; see the users' manual chapter on 3136 matrices. 3137 3138 You can call MatGetInfo() to get information on how effective the preallocation was; 3139 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3140 You can also run with the option -info and look for messages with the string 3141 malloc in them to see if additional memory allocation was needed. 3142 3143 Level: intermediate 3144 3145 .keywords: matrix, block, aij, compressed row, sparse, parallel 3146 3147 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 3148 @*/ 3149 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3150 { 3151 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 3152 3153 PetscFunctionBegin; 3154 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3155 if (f) { 3156 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3157 } 3158 PetscFunctionReturn(0); 3159 } 3160 3161 #undef __FUNCT__ 3162 #define __FUNCT__ "MatCreateMPIBAIJ" 3163 /*@C 3164 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 3165 (block compressed row). For good matrix assembly performance 3166 the user should preallocate the matrix storage by setting the parameters 3167 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3168 performance can be increased by more than a factor of 50. 3169 3170 Collective on MPI_Comm 3171 3172 Input Parameters: 3173 + comm - MPI communicator 3174 . bs - size of blockk 3175 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3176 This value should be the same as the local size used in creating the 3177 y vector for the matrix-vector product y = Ax. 3178 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3179 This value should be the same as the local size used in creating the 3180 x vector for the matrix-vector product y = Ax. 3181 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3182 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3183 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3184 submatrix (same for all local rows) 3185 . d_nnz - array containing the number of nonzero blocks in the various block rows 3186 of the in diagonal portion of the local (possibly different for each block 3187 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3188 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3189 submatrix (same for all local rows). 3190 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3191 off-diagonal portion of the local submatrix (possibly different for 3192 each block row) or PETSC_NULL. 3193 3194 Output Parameter: 3195 . A - the matrix 3196 3197 Options Database Keys: 3198 + -mat_block_size - size of the blocks to use 3199 - -mat_use_hash_table <fact> 3200 3201 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3202 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3203 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3204 3205 Notes: 3206 If the *_nnz parameter is given then the *_nz parameter is ignored 3207 3208 A nonzero block is any block that as 1 or more nonzeros in it 3209 3210 The user MUST specify either the local or global matrix dimensions 3211 (possibly both). 3212 3213 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3214 than it must be used on all processors that share the object for that argument. 3215 3216 Storage Information: 3217 For a square global matrix we define each processor's diagonal portion 3218 to be its local rows and the corresponding columns (a square submatrix); 3219 each processor's off-diagonal portion encompasses the remainder of the 3220 local matrix (a rectangular submatrix). 3221 3222 The user can specify preallocated storage for the diagonal part of 3223 the local submatrix with either d_nz or d_nnz (not both). Set 3224 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3225 memory allocation. Likewise, specify preallocated storage for the 3226 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3227 3228 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3229 the figure below we depict these three local rows and all columns (0-11). 3230 3231 .vb 3232 0 1 2 3 4 5 6 7 8 9 10 11 3233 ------------------- 3234 row 3 | o o o d d d o o o o o o 3235 row 4 | o o o d d d o o o o o o 3236 row 5 | o o o d d d o o o o o o 3237 ------------------- 3238 .ve 3239 3240 Thus, any entries in the d locations are stored in the d (diagonal) 3241 submatrix, and any entries in the o locations are stored in the 3242 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3243 stored simply in the MATSEQBAIJ format for compressed row storage. 3244 3245 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3246 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3247 In general, for PDE problems in which most nonzeros are near the diagonal, 3248 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3249 or you will get TERRIBLE performance; see the users' manual chapter on 3250 matrices. 3251 3252 Level: intermediate 3253 3254 .keywords: matrix, block, aij, compressed row, sparse, parallel 3255 3256 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3257 @*/ 3258 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3259 { 3260 PetscErrorCode ierr; 3261 PetscMPIInt size; 3262 3263 PetscFunctionBegin; 3264 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3265 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3266 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3267 if (size > 1) { 3268 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3269 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3270 } else { 3271 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3272 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3273 } 3274 PetscFunctionReturn(0); 3275 } 3276 3277 #undef __FUNCT__ 3278 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3279 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3280 { 3281 Mat mat; 3282 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3283 PetscErrorCode ierr; 3284 PetscInt len=0; 3285 3286 PetscFunctionBegin; 3287 *newmat = 0; 3288 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3289 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3290 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3291 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3292 3293 mat->factor = matin->factor; 3294 mat->preallocated = PETSC_TRUE; 3295 mat->assembled = PETSC_TRUE; 3296 mat->insertmode = NOT_SET_VALUES; 3297 3298 a = (Mat_MPIBAIJ*)mat->data; 3299 mat->rmap->bs = matin->rmap->bs; 3300 a->bs2 = oldmat->bs2; 3301 a->mbs = oldmat->mbs; 3302 a->nbs = oldmat->nbs; 3303 a->Mbs = oldmat->Mbs; 3304 a->Nbs = oldmat->Nbs; 3305 3306 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3307 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3308 3309 a->size = oldmat->size; 3310 a->rank = oldmat->rank; 3311 a->donotstash = oldmat->donotstash; 3312 a->roworiented = oldmat->roworiented; 3313 a->rowindices = 0; 3314 a->rowvalues = 0; 3315 a->getrowactive = PETSC_FALSE; 3316 a->barray = 0; 3317 a->rstartbs = oldmat->rstartbs; 3318 a->rendbs = oldmat->rendbs; 3319 a->cstartbs = oldmat->cstartbs; 3320 a->cendbs = oldmat->cendbs; 3321 3322 /* hash table stuff */ 3323 a->ht = 0; 3324 a->hd = 0; 3325 a->ht_size = 0; 3326 a->ht_flag = oldmat->ht_flag; 3327 a->ht_fact = oldmat->ht_fact; 3328 a->ht_total_ct = 0; 3329 a->ht_insert_ct = 0; 3330 3331 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3332 if (oldmat->colmap) { 3333 #if defined (PETSC_USE_CTABLE) 3334 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3335 #else 3336 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3337 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3338 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3339 #endif 3340 } else a->colmap = 0; 3341 3342 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3343 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3344 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3345 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3346 } else a->garray = 0; 3347 3348 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3349 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3350 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3351 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3352 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3353 3354 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3355 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3356 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3357 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3358 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3359 *newmat = mat; 3360 3361 PetscFunctionReturn(0); 3362 } 3363 3364 #undef __FUNCT__ 3365 #define __FUNCT__ "MatLoad_MPIBAIJ" 3366 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, const MatType type,Mat *newmat) 3367 { 3368 Mat A; 3369 PetscErrorCode ierr; 3370 int fd; 3371 PetscInt i,nz,j,rstart,rend; 3372 PetscScalar *vals,*buf; 3373 MPI_Comm comm = ((PetscObject)viewer)->comm; 3374 MPI_Status status; 3375 PetscMPIInt rank,size,maxnz; 3376 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3377 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3378 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3379 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3380 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3381 PetscInt dcount,kmax,k,nzcount,tmp,mend; 3382 3383 PetscFunctionBegin; 3384 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3385 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3386 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3387 3388 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3389 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3390 if (!rank) { 3391 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3392 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3393 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3394 } 3395 3396 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3397 M = header[1]; N = header[2]; 3398 3399 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 3400 3401 /* 3402 This code adds extra rows to make sure the number of rows is 3403 divisible by the blocksize 3404 */ 3405 Mbs = M/bs; 3406 extra_rows = bs - M + bs*Mbs; 3407 if (extra_rows == bs) extra_rows = 0; 3408 else Mbs++; 3409 if (extra_rows && !rank) { 3410 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3411 } 3412 3413 /* determine ownership of all rows */ 3414 mbs = Mbs/size + ((Mbs % size) > rank); 3415 m = mbs*bs; 3416 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3417 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3418 3419 /* process 0 needs enough room for process with most rows */ 3420 if (!rank) { 3421 mmax = rowners[1]; 3422 for (i=2; i<size; i++) { 3423 mmax = PetscMax(mmax,rowners[i]); 3424 } 3425 mmax*=bs; 3426 } else mmax = m; 3427 3428 rowners[0] = 0; 3429 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3430 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3431 rstart = rowners[rank]; 3432 rend = rowners[rank+1]; 3433 3434 /* distribute row lengths to all processors */ 3435 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3436 if (!rank) { 3437 mend = m; 3438 if (size == 1) mend = mend - extra_rows; 3439 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3440 for (j=mend; j<m; j++) locrowlens[j] = 1; 3441 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3442 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3443 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3444 for (j=0; j<m; j++) { 3445 procsnz[0] += locrowlens[j]; 3446 } 3447 for (i=1; i<size; i++) { 3448 mend = browners[i+1] - browners[i]; 3449 if (i == size-1) mend = mend - extra_rows; 3450 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3451 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3452 /* calculate the number of nonzeros on each processor */ 3453 for (j=0; j<browners[i+1]-browners[i]; j++) { 3454 procsnz[i] += rowlengths[j]; 3455 } 3456 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3457 } 3458 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3459 } else { 3460 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3461 } 3462 3463 if (!rank) { 3464 /* determine max buffer needed and allocate it */ 3465 maxnz = procsnz[0]; 3466 for (i=1; i<size; i++) { 3467 maxnz = PetscMax(maxnz,procsnz[i]); 3468 } 3469 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3470 3471 /* read in my part of the matrix column indices */ 3472 nz = procsnz[0]; 3473 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3474 mycols = ibuf; 3475 if (size == 1) nz -= extra_rows; 3476 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3477 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3478 3479 /* read in every ones (except the last) and ship off */ 3480 for (i=1; i<size-1; i++) { 3481 nz = procsnz[i]; 3482 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3483 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3484 } 3485 /* read in the stuff for the last proc */ 3486 if (size != 1) { 3487 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3488 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3489 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3490 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3491 } 3492 ierr = PetscFree(cols);CHKERRQ(ierr); 3493 } else { 3494 /* determine buffer space needed for message */ 3495 nz = 0; 3496 for (i=0; i<m; i++) { 3497 nz += locrowlens[i]; 3498 } 3499 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3500 mycols = ibuf; 3501 /* receive message of column indices*/ 3502 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3503 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3504 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3505 } 3506 3507 /* loop over local rows, determining number of off diagonal entries */ 3508 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3509 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3510 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3511 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3512 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3513 rowcount = 0; nzcount = 0; 3514 for (i=0; i<mbs; i++) { 3515 dcount = 0; 3516 odcount = 0; 3517 for (j=0; j<bs; j++) { 3518 kmax = locrowlens[rowcount]; 3519 for (k=0; k<kmax; k++) { 3520 tmp = mycols[nzcount++]/bs; 3521 if (!mask[tmp]) { 3522 mask[tmp] = 1; 3523 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3524 else masked1[dcount++] = tmp; 3525 } 3526 } 3527 rowcount++; 3528 } 3529 3530 dlens[i] = dcount; 3531 odlens[i] = odcount; 3532 3533 /* zero out the mask elements we set */ 3534 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3535 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3536 } 3537 3538 /* create our matrix */ 3539 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 3540 ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3541 ierr = MatSetType(A,type);CHKERRQ(ierr) 3542 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3543 3544 if (!rank) { 3545 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3546 /* read in my part of the matrix numerical values */ 3547 nz = procsnz[0]; 3548 vals = buf; 3549 mycols = ibuf; 3550 if (size == 1) nz -= extra_rows; 3551 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3552 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3553 3554 /* insert into matrix */ 3555 jj = rstart*bs; 3556 for (i=0; i<m; i++) { 3557 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3558 mycols += locrowlens[i]; 3559 vals += locrowlens[i]; 3560 jj++; 3561 } 3562 /* read in other processors (except the last one) and ship out */ 3563 for (i=1; i<size-1; i++) { 3564 nz = procsnz[i]; 3565 vals = buf; 3566 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3567 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3568 } 3569 /* the last proc */ 3570 if (size != 1){ 3571 nz = procsnz[i] - extra_rows; 3572 vals = buf; 3573 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3574 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3575 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 3576 } 3577 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3578 } else { 3579 /* receive numeric values */ 3580 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3581 3582 /* receive message of values*/ 3583 vals = buf; 3584 mycols = ibuf; 3585 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 3586 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 3587 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3588 3589 /* insert into matrix */ 3590 jj = rstart*bs; 3591 for (i=0; i<m; i++) { 3592 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3593 mycols += locrowlens[i]; 3594 vals += locrowlens[i]; 3595 jj++; 3596 } 3597 } 3598 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3599 ierr = PetscFree(buf);CHKERRQ(ierr); 3600 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3601 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3602 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3603 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3604 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3605 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3606 3607 *newmat = A; 3608 PetscFunctionReturn(0); 3609 } 3610 3611 #undef __FUNCT__ 3612 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3613 /*@ 3614 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3615 3616 Input Parameters: 3617 . mat - the matrix 3618 . fact - factor 3619 3620 Collective on Mat 3621 3622 Level: advanced 3623 3624 Notes: 3625 This can also be set by the command line option: -mat_use_hash_table <fact> 3626 3627 .keywords: matrix, hashtable, factor, HT 3628 3629 .seealso: MatSetOption() 3630 @*/ 3631 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3632 { 3633 PetscErrorCode ierr,(*f)(Mat,PetscReal); 3634 3635 PetscFunctionBegin; 3636 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 3637 if (f) { 3638 ierr = (*f)(mat,fact);CHKERRQ(ierr); 3639 } 3640 PetscFunctionReturn(0); 3641 } 3642 3643 EXTERN_C_BEGIN 3644 #undef __FUNCT__ 3645 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3646 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3647 { 3648 Mat_MPIBAIJ *baij; 3649 3650 PetscFunctionBegin; 3651 baij = (Mat_MPIBAIJ*)mat->data; 3652 baij->ht_fact = fact; 3653 PetscFunctionReturn(0); 3654 } 3655 EXTERN_C_END 3656 3657 #undef __FUNCT__ 3658 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3659 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3660 { 3661 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3662 PetscFunctionBegin; 3663 *Ad = a->A; 3664 *Ao = a->B; 3665 *colmap = a->garray; 3666 PetscFunctionReturn(0); 3667 } 3668 3669 /* 3670 Special version for direct calls from Fortran (to eliminate two function call overheads 3671 */ 3672 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3673 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3674 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3675 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3676 #endif 3677 3678 #undef __FUNCT__ 3679 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3680 /*@C 3681 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3682 3683 Collective on Mat 3684 3685 Input Parameters: 3686 + mat - the matrix 3687 . min - number of input rows 3688 . im - input rows 3689 . nin - number of input columns 3690 . in - input columns 3691 . v - numerical values input 3692 - addvin - INSERT_VALUES or ADD_VALUES 3693 3694 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3695 3696 Level: advanced 3697 3698 .seealso: MatSetValuesBlocked() 3699 @*/ 3700 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3701 { 3702 /* convert input arguments to C version */ 3703 Mat mat = *matin; 3704 PetscInt m = *min, n = *nin; 3705 InsertMode addv = *addvin; 3706 3707 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3708 const MatScalar *value; 3709 MatScalar *barray=baij->barray; 3710 PetscTruth roworiented = baij->roworiented; 3711 PetscErrorCode ierr; 3712 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3713 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3714 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3715 3716 PetscFunctionBegin; 3717 /* tasks normally handled by MatSetValuesBlocked() */ 3718 if (mat->insertmode == NOT_SET_VALUES) { 3719 mat->insertmode = addv; 3720 } 3721 #if defined(PETSC_USE_DEBUG) 3722 else if (mat->insertmode != addv) { 3723 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3724 } 3725 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3726 #endif 3727 if (mat->assembled) { 3728 mat->was_assembled = PETSC_TRUE; 3729 mat->assembled = PETSC_FALSE; 3730 } 3731 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3732 3733 3734 if(!barray) { 3735 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3736 baij->barray = barray; 3737 } 3738 3739 if (roworiented) { 3740 stepval = (n-1)*bs; 3741 } else { 3742 stepval = (m-1)*bs; 3743 } 3744 for (i=0; i<m; i++) { 3745 if (im[i] < 0) continue; 3746 #if defined(PETSC_USE_DEBUG) 3747 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3748 #endif 3749 if (im[i] >= rstart && im[i] < rend) { 3750 row = im[i] - rstart; 3751 for (j=0; j<n; j++) { 3752 /* If NumCol = 1 then a copy is not required */ 3753 if ((roworiented) && (n == 1)) { 3754 barray = (MatScalar*)v + i*bs2; 3755 } else if((!roworiented) && (m == 1)) { 3756 barray = (MatScalar*)v + j*bs2; 3757 } else { /* Here a copy is required */ 3758 if (roworiented) { 3759 value = v + i*(stepval+bs)*bs + j*bs; 3760 } else { 3761 value = v + j*(stepval+bs)*bs + i*bs; 3762 } 3763 for (ii=0; ii<bs; ii++,value+=stepval) { 3764 for (jj=0; jj<bs; jj++) { 3765 *barray++ = *value++; 3766 } 3767 } 3768 barray -=bs2; 3769 } 3770 3771 if (in[j] >= cstart && in[j] < cend){ 3772 col = in[j] - cstart; 3773 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3774 } 3775 else if (in[j] < 0) continue; 3776 #if defined(PETSC_USE_DEBUG) 3777 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 3778 #endif 3779 else { 3780 if (mat->was_assembled) { 3781 if (!baij->colmap) { 3782 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3783 } 3784 3785 #if defined(PETSC_USE_DEBUG) 3786 #if defined (PETSC_USE_CTABLE) 3787 { PetscInt data; 3788 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3789 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 3790 } 3791 #else 3792 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 3793 #endif 3794 #endif 3795 #if defined (PETSC_USE_CTABLE) 3796 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3797 col = (col - 1)/bs; 3798 #else 3799 col = (baij->colmap[in[j]] - 1)/bs; 3800 #endif 3801 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3802 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3803 col = in[j]; 3804 } 3805 } 3806 else col = in[j]; 3807 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3808 } 3809 } 3810 } else { 3811 if (!baij->donotstash) { 3812 if (roworiented) { 3813 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3814 } else { 3815 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3816 } 3817 } 3818 } 3819 } 3820 3821 /* task normally handled by MatSetValuesBlocked() */ 3822 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3823 PetscFunctionReturn(0); 3824 } 3825