1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 4 #include "petscblaslapack.h" 5 6 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 7 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 8 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 9 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 10 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 11 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 12 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 13 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec); 16 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 19 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 20 { 21 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 22 PetscErrorCode ierr; 23 PetscInt i,*idxb = 0; 24 PetscScalar *va,*vb; 25 Vec vtmp; 26 27 PetscFunctionBegin; 28 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 29 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 30 if (idx) { 31 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 32 } 33 34 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 35 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 36 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 37 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 38 39 for (i=0; i<A->rmap->n; i++){ 40 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 41 } 42 43 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 44 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 45 if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);} 46 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 47 PetscFunctionReturn(0); 48 } 49 50 EXTERN_C_BEGIN 51 #undef __FUNCT__ 52 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 53 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat) 54 { 55 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 56 PetscErrorCode ierr; 57 58 PetscFunctionBegin; 59 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 60 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 61 PetscFunctionReturn(0); 62 } 63 EXTERN_C_END 64 65 EXTERN_C_BEGIN 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 68 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat) 69 { 70 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 75 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 76 PetscFunctionReturn(0); 77 } 78 EXTERN_C_END 79 80 /* 81 Local utility routine that creates a mapping from the global column 82 number to the local number in the off-diagonal part of the local 83 storage of the matrix. This is done in a non scalable way since the 84 length of colmap equals the global matrix length. 85 */ 86 #undef __FUNCT__ 87 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 88 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 89 { 90 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 91 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 92 PetscErrorCode ierr; 93 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 94 95 PetscFunctionBegin; 96 #if defined (PETSC_USE_CTABLE) 97 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 98 for (i=0; i<nbs; i++){ 99 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 100 } 101 #else 102 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 103 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 104 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 105 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 106 #endif 107 PetscFunctionReturn(0); 108 } 109 110 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 111 { \ 112 \ 113 brow = row/bs; \ 114 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 115 rmax = aimax[brow]; nrow = ailen[brow]; \ 116 bcol = col/bs; \ 117 ridx = row % bs; cidx = col % bs; \ 118 low = 0; high = nrow; \ 119 while (high-low > 3) { \ 120 t = (low+high)/2; \ 121 if (rp[t] > bcol) high = t; \ 122 else low = t; \ 123 } \ 124 for (_i=low; _i<high; _i++) { \ 125 if (rp[_i] > bcol) break; \ 126 if (rp[_i] == bcol) { \ 127 bap = ap + bs2*_i + bs*cidx + ridx; \ 128 if (addv == ADD_VALUES) *bap += value; \ 129 else *bap = value; \ 130 goto a_noinsert; \ 131 } \ 132 } \ 133 if (a->nonew == 1) goto a_noinsert; \ 134 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 135 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 136 N = nrow++ - 1; \ 137 /* shift up all the later entries in this row */ \ 138 for (ii=N; ii>=_i; ii--) { \ 139 rp[ii+1] = rp[ii]; \ 140 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 141 } \ 142 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 143 rp[_i] = bcol; \ 144 ap[bs2*_i + bs*cidx + ridx] = value; \ 145 a_noinsert:; \ 146 ailen[brow] = nrow; \ 147 } 148 149 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 150 { \ 151 brow = row/bs; \ 152 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 153 rmax = bimax[brow]; nrow = bilen[brow]; \ 154 bcol = col/bs; \ 155 ridx = row % bs; cidx = col % bs; \ 156 low = 0; high = nrow; \ 157 while (high-low > 3) { \ 158 t = (low+high)/2; \ 159 if (rp[t] > bcol) high = t; \ 160 else low = t; \ 161 } \ 162 for (_i=low; _i<high; _i++) { \ 163 if (rp[_i] > bcol) break; \ 164 if (rp[_i] == bcol) { \ 165 bap = ap + bs2*_i + bs*cidx + ridx; \ 166 if (addv == ADD_VALUES) *bap += value; \ 167 else *bap = value; \ 168 goto b_noinsert; \ 169 } \ 170 } \ 171 if (b->nonew == 1) goto b_noinsert; \ 172 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 173 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 174 CHKMEMQ;\ 175 N = nrow++ - 1; \ 176 /* shift up all the later entries in this row */ \ 177 for (ii=N; ii>=_i; ii--) { \ 178 rp[ii+1] = rp[ii]; \ 179 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 180 } \ 181 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 182 rp[_i] = bcol; \ 183 ap[bs2*_i + bs*cidx + ridx] = value; \ 184 b_noinsert:; \ 185 bilen[brow] = nrow; \ 186 } 187 188 #undef __FUNCT__ 189 #define __FUNCT__ "MatSetValues_MPIBAIJ" 190 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 191 { 192 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 193 MatScalar value; 194 PetscBool roworiented = baij->roworiented; 195 PetscErrorCode ierr; 196 PetscInt i,j,row,col; 197 PetscInt rstart_orig=mat->rmap->rstart; 198 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 199 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 200 201 /* Some Variables required in the macro */ 202 Mat A = baij->A; 203 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 204 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 205 MatScalar *aa=a->a; 206 207 Mat B = baij->B; 208 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 209 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 210 MatScalar *ba=b->a; 211 212 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 213 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 214 MatScalar *ap,*bap; 215 216 PetscFunctionBegin; 217 if (v) PetscValidScalarPointer(v,6); 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_DEBUG) 221 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 222 #endif 223 if (im[i] >= rstart_orig && im[i] < rend_orig) { 224 row = im[i] - rstart_orig; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart_orig && in[j] < cend_orig){ 227 col = in[j] - cstart_orig; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 230 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 231 } else if (in[j] < 0) continue; 232 #if defined(PETSC_USE_DEBUG) 233 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 234 #endif 235 else { 236 if (mat->was_assembled) { 237 if (!baij->colmap) { 238 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 239 } 240 #if defined (PETSC_USE_CTABLE) 241 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 242 col = col - 1; 243 #else 244 col = baij->colmap[in[j]/bs] - 1; 245 #endif 246 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 247 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 248 col = in[j]; 249 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 250 B = baij->B; 251 b = (Mat_SeqBAIJ*)(B)->data; 252 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 253 ba=b->a; 254 } else col += in[j]%bs; 255 } else col = in[j]; 256 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 257 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 258 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 259 } 260 } 261 } else { 262 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 263 if (!baij->donotstash) { 264 if (roworiented) { 265 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 266 } else { 267 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 268 } 269 } 270 } 271 } 272 PetscFunctionReturn(0); 273 } 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 277 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 278 { 279 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 280 const PetscScalar *value; 281 MatScalar *barray=baij->barray; 282 PetscBool roworiented = baij->roworiented; 283 PetscErrorCode ierr; 284 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 285 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 286 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 287 288 PetscFunctionBegin; 289 if(!barray) { 290 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 291 baij->barray = barray; 292 } 293 294 if (roworiented) { 295 stepval = (n-1)*bs; 296 } else { 297 stepval = (m-1)*bs; 298 } 299 for (i=0; i<m; i++) { 300 if (im[i] < 0) continue; 301 #if defined(PETSC_USE_DEBUG) 302 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 303 #endif 304 if (im[i] >= rstart && im[i] < rend) { 305 row = im[i] - rstart; 306 for (j=0; j<n; j++) { 307 /* If NumCol = 1 then a copy is not required */ 308 if ((roworiented) && (n == 1)) { 309 barray = (MatScalar*)v + i*bs2; 310 } else if((!roworiented) && (m == 1)) { 311 barray = (MatScalar*)v + j*bs2; 312 } else { /* Here a copy is required */ 313 if (roworiented) { 314 value = v + (i*(stepval+bs) + j)*bs; 315 } else { 316 value = v + (j*(stepval+bs) + i)*bs; 317 } 318 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 319 for (jj=0; jj<bs; jj++) { 320 barray[jj] = value[jj]; 321 } 322 barray += bs; 323 } 324 barray -= bs2; 325 } 326 327 if (in[j] >= cstart && in[j] < cend){ 328 col = in[j] - cstart; 329 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 330 } 331 else if (in[j] < 0) continue; 332 #if defined(PETSC_USE_DEBUG) 333 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 334 #endif 335 else { 336 if (mat->was_assembled) { 337 if (!baij->colmap) { 338 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 339 } 340 341 #if defined(PETSC_USE_DEBUG) 342 #if defined (PETSC_USE_CTABLE) 343 { PetscInt data; 344 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 345 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 346 } 347 #else 348 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 349 #endif 350 #endif 351 #if defined (PETSC_USE_CTABLE) 352 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 353 col = (col - 1)/bs; 354 #else 355 col = (baij->colmap[in[j]] - 1)/bs; 356 #endif 357 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 358 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 359 col = in[j]; 360 } 361 } 362 else col = in[j]; 363 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 364 } 365 } 366 } else { 367 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 368 if (!baij->donotstash) { 369 if (roworiented) { 370 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 371 } else { 372 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 373 } 374 } 375 } 376 } 377 PetscFunctionReturn(0); 378 } 379 380 #define HASH_KEY 0.6180339887 381 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 382 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 383 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 384 #undef __FUNCT__ 385 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 386 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 387 { 388 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 389 PetscBool roworiented = baij->roworiented; 390 PetscErrorCode ierr; 391 PetscInt i,j,row,col; 392 PetscInt rstart_orig=mat->rmap->rstart; 393 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 394 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 395 PetscReal tmp; 396 MatScalar **HD = baij->hd,value; 397 #if defined(PETSC_USE_DEBUG) 398 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 399 #endif 400 401 PetscFunctionBegin; 402 if (v) PetscValidScalarPointer(v,6); 403 for (i=0; i<m; i++) { 404 #if defined(PETSC_USE_DEBUG) 405 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 406 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 407 #endif 408 row = im[i]; 409 if (row >= rstart_orig && row < rend_orig) { 410 for (j=0; j<n; j++) { 411 col = in[j]; 412 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 413 /* Look up PetscInto the Hash Table */ 414 key = (row/bs)*Nbs+(col/bs)+1; 415 h1 = HASH(size,key,tmp); 416 417 418 idx = h1; 419 #if defined(PETSC_USE_DEBUG) 420 insert_ct++; 421 total_ct++; 422 if (HT[idx] != key) { 423 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 424 if (idx == size) { 425 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 426 if (idx == h1) { 427 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 428 } 429 } 430 } 431 #else 432 if (HT[idx] != key) { 433 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 434 if (idx == size) { 435 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 436 if (idx == h1) { 437 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 438 } 439 } 440 } 441 #endif 442 /* A HASH table entry is found, so insert the values at the correct address */ 443 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 444 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 445 } 446 } else { 447 if (!baij->donotstash) { 448 if (roworiented) { 449 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 450 } else { 451 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 452 } 453 } 454 } 455 } 456 #if defined(PETSC_USE_DEBUG) 457 baij->ht_total_ct = total_ct; 458 baij->ht_insert_ct = insert_ct; 459 #endif 460 PetscFunctionReturn(0); 461 } 462 463 #undef __FUNCT__ 464 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 465 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 466 { 467 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 468 PetscBool roworiented = baij->roworiented; 469 PetscErrorCode ierr; 470 PetscInt i,j,ii,jj,row,col; 471 PetscInt rstart=baij->rstartbs; 472 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 473 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 474 PetscReal tmp; 475 MatScalar **HD = baij->hd,*baij_a; 476 const PetscScalar *v_t,*value; 477 #if defined(PETSC_USE_DEBUG) 478 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 479 #endif 480 481 PetscFunctionBegin; 482 483 if (roworiented) { 484 stepval = (n-1)*bs; 485 } else { 486 stepval = (m-1)*bs; 487 } 488 for (i=0; i<m; i++) { 489 #if defined(PETSC_USE_DEBUG) 490 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 491 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 492 #endif 493 row = im[i]; 494 v_t = v + i*nbs2; 495 if (row >= rstart && row < rend) { 496 for (j=0; j<n; j++) { 497 col = in[j]; 498 499 /* Look up into the Hash Table */ 500 key = row*Nbs+col+1; 501 h1 = HASH(size,key,tmp); 502 503 idx = h1; 504 #if defined(PETSC_USE_DEBUG) 505 total_ct++; 506 insert_ct++; 507 if (HT[idx] != key) { 508 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 509 if (idx == size) { 510 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 511 if (idx == h1) { 512 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 513 } 514 } 515 } 516 #else 517 if (HT[idx] != key) { 518 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 519 if (idx == size) { 520 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 521 if (idx == h1) { 522 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 523 } 524 } 525 } 526 #endif 527 baij_a = HD[idx]; 528 if (roworiented) { 529 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 530 /* value = v + (i*(stepval+bs)+j)*bs; */ 531 value = v_t; 532 v_t += bs; 533 if (addv == ADD_VALUES) { 534 for (ii=0; ii<bs; ii++,value+=stepval) { 535 for (jj=ii; jj<bs2; jj+=bs) { 536 baij_a[jj] += *value++; 537 } 538 } 539 } else { 540 for (ii=0; ii<bs; ii++,value+=stepval) { 541 for (jj=ii; jj<bs2; jj+=bs) { 542 baij_a[jj] = *value++; 543 } 544 } 545 } 546 } else { 547 value = v + j*(stepval+bs)*bs + i*bs; 548 if (addv == ADD_VALUES) { 549 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 550 for (jj=0; jj<bs; jj++) { 551 baij_a[jj] += *value++; 552 } 553 } 554 } else { 555 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 556 for (jj=0; jj<bs; jj++) { 557 baij_a[jj] = *value++; 558 } 559 } 560 } 561 } 562 } 563 } else { 564 if (!baij->donotstash) { 565 if (roworiented) { 566 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 567 } else { 568 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 569 } 570 } 571 } 572 } 573 #if defined(PETSC_USE_DEBUG) 574 baij->ht_total_ct = total_ct; 575 baij->ht_insert_ct = insert_ct; 576 #endif 577 PetscFunctionReturn(0); 578 } 579 580 #undef __FUNCT__ 581 #define __FUNCT__ "MatGetValues_MPIBAIJ" 582 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 583 { 584 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 585 PetscErrorCode ierr; 586 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 587 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 588 589 PetscFunctionBegin; 590 for (i=0; i<m; i++) { 591 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 592 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 593 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 594 row = idxm[i] - bsrstart; 595 for (j=0; j<n; j++) { 596 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 597 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 598 if (idxn[j] >= bscstart && idxn[j] < bscend){ 599 col = idxn[j] - bscstart; 600 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 601 } else { 602 if (!baij->colmap) { 603 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 604 } 605 #if defined (PETSC_USE_CTABLE) 606 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 607 data --; 608 #else 609 data = baij->colmap[idxn[j]/bs]-1; 610 #endif 611 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 612 else { 613 col = data + idxn[j]%bs; 614 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 615 } 616 } 617 } 618 } else { 619 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 620 } 621 } 622 PetscFunctionReturn(0); 623 } 624 625 #undef __FUNCT__ 626 #define __FUNCT__ "MatNorm_MPIBAIJ" 627 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 628 { 629 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 630 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 631 PetscErrorCode ierr; 632 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 633 PetscReal sum = 0.0; 634 MatScalar *v; 635 636 PetscFunctionBegin; 637 if (baij->size == 1) { 638 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 639 } else { 640 if (type == NORM_FROBENIUS) { 641 v = amat->a; 642 nz = amat->nz*bs2; 643 for (i=0; i<nz; i++) { 644 #if defined(PETSC_USE_COMPLEX) 645 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 646 #else 647 sum += (*v)*(*v); v++; 648 #endif 649 } 650 v = bmat->a; 651 nz = bmat->nz*bs2; 652 for (i=0; i<nz; i++) { 653 #if defined(PETSC_USE_COMPLEX) 654 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 655 #else 656 sum += (*v)*(*v); v++; 657 #endif 658 } 659 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 660 *nrm = sqrt(*nrm); 661 } else if (type == NORM_1) { /* max column sum */ 662 PetscReal *tmp,*tmp2; 663 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 664 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 665 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 666 v = amat->a; jj = amat->j; 667 for (i=0; i<amat->nz; i++) { 668 for (j=0; j<bs; j++){ 669 col = bs*(cstart + *jj) + j; /* column index */ 670 for (row=0; row<bs; row++){ 671 tmp[col] += PetscAbsScalar(*v); v++; 672 } 673 } 674 jj++; 675 } 676 v = bmat->a; jj = bmat->j; 677 for (i=0; i<bmat->nz; i++) { 678 for (j=0; j<bs; j++){ 679 col = bs*garray[*jj] + j; 680 for (row=0; row<bs; row++){ 681 tmp[col] += PetscAbsScalar(*v); v++; 682 } 683 } 684 jj++; 685 } 686 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 687 *nrm = 0.0; 688 for (j=0; j<mat->cmap->N; j++) { 689 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 690 } 691 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 692 } else if (type == NORM_INFINITY) { /* max row sum */ 693 PetscReal *sums; 694 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 695 sum = 0.0; 696 for (j=0; j<amat->mbs; j++) { 697 for (row=0; row<bs; row++) sums[row] = 0.0; 698 v = amat->a + bs2*amat->i[j]; 699 nz = amat->i[j+1]-amat->i[j]; 700 for (i=0; i<nz; i++) { 701 for (col=0; col<bs; col++){ 702 for (row=0; row<bs; row++){ 703 sums[row] += PetscAbsScalar(*v); v++; 704 } 705 } 706 } 707 v = bmat->a + bs2*bmat->i[j]; 708 nz = bmat->i[j+1]-bmat->i[j]; 709 for (i=0; i<nz; i++) { 710 for (col=0; col<bs; col++){ 711 for (row=0; row<bs; row++){ 712 sums[row] += PetscAbsScalar(*v); v++; 713 } 714 } 715 } 716 for (row=0; row<bs; row++){ 717 if (sums[row] > sum) sum = sums[row]; 718 } 719 } 720 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 721 ierr = PetscFree(sums);CHKERRQ(ierr); 722 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet"); 723 } 724 PetscFunctionReturn(0); 725 } 726 727 /* 728 Creates the hash table, and sets the table 729 This table is created only once. 730 If new entried need to be added to the matrix 731 then the hash table has to be destroyed and 732 recreated. 733 */ 734 #undef __FUNCT__ 735 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 736 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 737 { 738 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 739 Mat A = baij->A,B=baij->B; 740 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 741 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 742 PetscErrorCode ierr; 743 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 744 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 745 PetscInt *HT,key; 746 MatScalar **HD; 747 PetscReal tmp; 748 #if defined(PETSC_USE_INFO) 749 PetscInt ct=0,max=0; 750 #endif 751 752 PetscFunctionBegin; 753 if (baij->ht) PetscFunctionReturn(0); 754 755 baij->ht_size = (PetscInt)(factor*nz); 756 ht_size = baij->ht_size; 757 758 /* Allocate Memory for Hash Table */ 759 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 760 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 761 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 762 HD = baij->hd; 763 HT = baij->ht; 764 765 /* Loop Over A */ 766 for (i=0; i<a->mbs; i++) { 767 for (j=ai[i]; j<ai[i+1]; j++) { 768 row = i+rstart; 769 col = aj[j]+cstart; 770 771 key = row*Nbs + col + 1; 772 h1 = HASH(ht_size,key,tmp); 773 for (k=0; k<ht_size; k++){ 774 if (!HT[(h1+k)%ht_size]) { 775 HT[(h1+k)%ht_size] = key; 776 HD[(h1+k)%ht_size] = a->a + j*bs2; 777 break; 778 #if defined(PETSC_USE_INFO) 779 } else { 780 ct++; 781 #endif 782 } 783 } 784 #if defined(PETSC_USE_INFO) 785 if (k> max) max = k; 786 #endif 787 } 788 } 789 /* Loop Over B */ 790 for (i=0; i<b->mbs; i++) { 791 for (j=bi[i]; j<bi[i+1]; j++) { 792 row = i+rstart; 793 col = garray[bj[j]]; 794 key = row*Nbs + col + 1; 795 h1 = HASH(ht_size,key,tmp); 796 for (k=0; k<ht_size; k++){ 797 if (!HT[(h1+k)%ht_size]) { 798 HT[(h1+k)%ht_size] = key; 799 HD[(h1+k)%ht_size] = b->a + j*bs2; 800 break; 801 #if defined(PETSC_USE_INFO) 802 } else { 803 ct++; 804 #endif 805 } 806 } 807 #if defined(PETSC_USE_INFO) 808 if (k> max) max = k; 809 #endif 810 } 811 } 812 813 /* Print Summary */ 814 #if defined(PETSC_USE_INFO) 815 for (i=0,j=0; i<ht_size; i++) { 816 if (HT[i]) {j++;} 817 } 818 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 819 #endif 820 PetscFunctionReturn(0); 821 } 822 823 #undef __FUNCT__ 824 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 825 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 826 { 827 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 828 PetscErrorCode ierr; 829 PetscInt nstash,reallocs; 830 InsertMode addv; 831 832 PetscFunctionBegin; 833 if (baij->donotstash || mat->nooffprocentries) { 834 PetscFunctionReturn(0); 835 } 836 837 /* make sure all processors are either in INSERTMODE or ADDMODE */ 838 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 839 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 840 mat->insertmode = addv; /* in case this processor had no cache */ 841 842 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 843 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 844 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 845 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 846 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 847 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 848 PetscFunctionReturn(0); 849 } 850 851 #undef __FUNCT__ 852 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 853 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 854 { 855 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 856 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 857 PetscErrorCode ierr; 858 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 859 PetscInt *row,*col; 860 PetscBool r1,r2,r3,other_disassembled; 861 MatScalar *val; 862 InsertMode addv = mat->insertmode; 863 PetscMPIInt n; 864 865 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 866 PetscFunctionBegin; 867 if (!baij->donotstash && !mat->nooffprocentries) { 868 while (1) { 869 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 870 if (!flg) break; 871 872 for (i=0; i<n;) { 873 /* Now identify the consecutive vals belonging to the same row */ 874 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 875 if (j < n) ncols = j-i; 876 else ncols = n-i; 877 /* Now assemble all these values with a single function call */ 878 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 879 i = j; 880 } 881 } 882 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 883 /* Now process the block-stash. Since the values are stashed column-oriented, 884 set the roworiented flag to column oriented, and after MatSetValues() 885 restore the original flags */ 886 r1 = baij->roworiented; 887 r2 = a->roworiented; 888 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 889 baij->roworiented = PETSC_FALSE; 890 a->roworiented = PETSC_FALSE; 891 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 892 while (1) { 893 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 894 if (!flg) break; 895 896 for (i=0; i<n;) { 897 /* Now identify the consecutive vals belonging to the same row */ 898 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 899 if (j < n) ncols = j-i; 900 else ncols = n-i; 901 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 902 i = j; 903 } 904 } 905 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 906 baij->roworiented = r1; 907 a->roworiented = r2; 908 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 909 } 910 911 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 912 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 913 914 /* determine if any processor has disassembled, if so we must 915 also disassemble ourselfs, in order that we may reassemble. */ 916 /* 917 if nonzero structure of submatrix B cannot change then we know that 918 no processor disassembled thus we can skip this stuff 919 */ 920 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 921 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 922 if (mat->was_assembled && !other_disassembled) { 923 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 924 } 925 } 926 927 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 928 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 929 } 930 ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 931 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 932 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 933 934 #if defined(PETSC_USE_INFO) 935 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 936 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 937 baij->ht_total_ct = 0; 938 baij->ht_insert_ct = 0; 939 } 940 #endif 941 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 942 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 943 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 944 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 945 } 946 947 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 948 baij->rowvalues = 0; 949 PetscFunctionReturn(0); 950 } 951 952 #undef __FUNCT__ 953 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 954 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 955 { 956 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 957 PetscErrorCode ierr; 958 PetscMPIInt size = baij->size,rank = baij->rank; 959 PetscInt bs = mat->rmap->bs; 960 PetscBool iascii,isdraw; 961 PetscViewer sviewer; 962 PetscViewerFormat format; 963 964 PetscFunctionBegin; 965 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 966 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 967 if (iascii) { 968 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 969 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 970 MatInfo info; 971 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 972 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 973 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 974 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 975 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 976 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 977 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 978 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 979 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 980 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 981 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 982 PetscFunctionReturn(0); 983 } else if (format == PETSC_VIEWER_ASCII_INFO) { 984 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 985 PetscFunctionReturn(0); 986 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 987 PetscFunctionReturn(0); 988 } 989 } 990 991 if (isdraw) { 992 PetscDraw draw; 993 PetscBool isnull; 994 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 995 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 996 } 997 998 if (size == 1) { 999 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1000 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1001 } else { 1002 /* assemble the entire matrix onto first processor. */ 1003 Mat A; 1004 Mat_SeqBAIJ *Aloc; 1005 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1006 MatScalar *a; 1007 1008 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1009 /* Perhaps this should be the type of mat? */ 1010 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1011 if (!rank) { 1012 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1013 } else { 1014 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1015 } 1016 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1017 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1018 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1019 1020 /* copy over the A part */ 1021 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1022 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1023 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1024 1025 for (i=0; i<mbs; i++) { 1026 rvals[0] = bs*(baij->rstartbs + i); 1027 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1028 for (j=ai[i]; j<ai[i+1]; j++) { 1029 col = (baij->cstartbs+aj[j])*bs; 1030 for (k=0; k<bs; k++) { 1031 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1032 col++; a += bs; 1033 } 1034 } 1035 } 1036 /* copy over the B part */ 1037 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1038 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1039 for (i=0; i<mbs; i++) { 1040 rvals[0] = bs*(baij->rstartbs + i); 1041 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1042 for (j=ai[i]; j<ai[i+1]; j++) { 1043 col = baij->garray[aj[j]]*bs; 1044 for (k=0; k<bs; k++) { 1045 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1046 col++; a += bs; 1047 } 1048 } 1049 } 1050 ierr = PetscFree(rvals);CHKERRQ(ierr); 1051 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1052 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1053 /* 1054 Everyone has to call to draw the matrix since the graphics waits are 1055 synchronized across all processors that share the PetscDraw object 1056 */ 1057 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1058 if (!rank) { 1059 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1060 /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/ 1061 PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ); 1062 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1063 } 1064 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1065 ierr = MatDestroy(A);CHKERRQ(ierr); 1066 } 1067 PetscFunctionReturn(0); 1068 } 1069 1070 #undef __FUNCT__ 1071 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1072 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1073 { 1074 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1075 Mat_SeqBAIJ* A = (Mat_SeqBAIJ*)a->A->data; 1076 Mat_SeqBAIJ* B = (Mat_SeqBAIJ*)a->B->data; 1077 PetscErrorCode ierr; 1078 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,count,j,k,bs2=a->bs2,header[4],nz,rlen; 1079 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1080 int fd; 1081 PetscScalar *column_values; 1082 FILE *file; 1083 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1084 PetscInt message_count,flowcontrolcount; 1085 1086 PetscFunctionBegin; 1087 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1088 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1089 nz = bs2*(A->nz + B->nz); 1090 rlen = mat->rmap->n; 1091 if (!rank) { 1092 header[0] = MAT_FILE_CLASSID; 1093 header[1] = mat->rmap->N; 1094 header[2] = mat->cmap->N; 1095 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1096 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1097 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1098 /* get largest number of rows any processor has */ 1099 range = mat->rmap->range; 1100 for (i=1; i<size; i++) { 1101 rlen = PetscMax(rlen,range[i+1] - range[i]); 1102 } 1103 } else { 1104 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1105 } 1106 1107 ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr); 1108 /* compute lengths of each row */ 1109 count = 0; 1110 for (i=0; i<a->mbs; i++) { 1111 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1112 } 1113 /* store the row lengths to the file */ 1114 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1115 if (!rank) { 1116 MPI_Status status; 1117 ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr); 1118 rlen = (range[1] - range[0])/bs; 1119 for (i=0; i<rlen; i++) { 1120 for (j=0; j<bs; j++) { 1121 row_lens[i*bs+j] = bs*crow_lens[i]; 1122 } 1123 } 1124 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1125 for (i=1; i<size; i++) { 1126 rlen = (range[i+1] - range[i])/bs; 1127 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1128 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1129 for (k=0; k<rlen; k++) { 1130 for (j=0; j<bs; j++) { 1131 row_lens[k*bs+j] = bs*crow_lens[k]; 1132 } 1133 } 1134 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1135 } 1136 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1137 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1138 } else { 1139 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1140 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1141 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1142 } 1143 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1144 1145 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1146 information needed to make it for each row from a block row. This does require more communication but still not more than 1147 the communication needed for the nonzero values */ 1148 nzmax = nz; /* space a largest processor needs */ 1149 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1150 ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1151 cnt = 0; 1152 for (i=0; i<a->mbs; i++) { 1153 pcnt = cnt; 1154 for (j=B->i[i]; j<B->i[i+1]; j++) { 1155 if ( (col = garray[B->j[j]]) > cstart) break; 1156 for (l=0; l<bs; l++) { 1157 column_indices[cnt++] = bs*col+l; 1158 } 1159 } 1160 for (k=A->i[i]; k<A->i[i+1]; k++) { 1161 for (l=0; l<bs; l++) { 1162 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1163 } 1164 } 1165 for (; j<B->i[i+1]; j++) { 1166 for (l=0; l<bs; l++) { 1167 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1168 } 1169 } 1170 len = cnt - pcnt; 1171 for (k=1; k<bs; k++) { 1172 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1173 cnt += len; 1174 } 1175 } 1176 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1177 1178 /* store the columns to the file */ 1179 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1180 if (!rank) { 1181 MPI_Status status; 1182 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1183 for (i=1; i<size; i++) { 1184 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1185 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1186 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1187 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1188 } 1189 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1190 } else { 1191 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1192 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1193 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1194 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1195 } 1196 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1197 1198 /* load up the numerical values */ 1199 ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1200 cnt = 0; 1201 for (i=0; i<a->mbs; i++) { 1202 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1203 for (j=B->i[i]; j<B->i[i+1]; j++) { 1204 if ( garray[B->j[j]] > cstart) break; 1205 for (l=0; l<bs; l++) { 1206 for (ll=0; ll<bs; ll++) { 1207 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1208 } 1209 } 1210 cnt += bs; 1211 } 1212 for (k=A->i[i]; k<A->i[i+1]; k++) { 1213 for (l=0; l<bs; l++) { 1214 for (ll=0; ll<bs; ll++) { 1215 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1216 } 1217 } 1218 cnt += bs; 1219 } 1220 for (; j<B->i[i+1]; j++) { 1221 for (l=0; l<bs; l++) { 1222 for (ll=0; ll<bs; ll++) { 1223 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1224 } 1225 } 1226 cnt += bs; 1227 } 1228 cnt += (bs-1)*rlen; 1229 } 1230 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1231 1232 /* store the column values to the file */ 1233 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1234 if (!rank) { 1235 MPI_Status status; 1236 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1237 for (i=1; i<size; i++) { 1238 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1239 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1240 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1241 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1242 } 1243 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1244 } else { 1245 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1246 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1247 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1248 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1249 } 1250 ierr = PetscFree(column_values);CHKERRQ(ierr); 1251 1252 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1253 if (file) { 1254 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1255 } 1256 PetscFunctionReturn(0); 1257 } 1258 1259 #undef __FUNCT__ 1260 #define __FUNCT__ "MatView_MPIBAIJ" 1261 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1262 { 1263 PetscErrorCode ierr; 1264 PetscBool iascii,isdraw,issocket,isbinary; 1265 1266 PetscFunctionBegin; 1267 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1268 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1269 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1270 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1271 if (iascii || isdraw || issocket) { 1272 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1273 } else if (isbinary) { 1274 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1275 } else { 1276 SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1277 } 1278 PetscFunctionReturn(0); 1279 } 1280 1281 #undef __FUNCT__ 1282 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1283 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1284 { 1285 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1286 PetscErrorCode ierr; 1287 1288 PetscFunctionBegin; 1289 #if defined(PETSC_USE_LOG) 1290 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1291 #endif 1292 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1293 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1294 if (baij->A){ierr = MatDestroy(baij->A);CHKERRQ(ierr);} 1295 if (baij->B){ierr = MatDestroy(baij->B);CHKERRQ(ierr);} 1296 #if defined (PETSC_USE_CTABLE) 1297 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 1298 #else 1299 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1300 #endif 1301 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1302 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1303 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1304 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1305 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1306 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1307 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1308 ierr = PetscFree(baij);CHKERRQ(ierr); 1309 1310 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1311 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1312 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1313 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1314 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1315 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1316 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1317 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1318 PetscFunctionReturn(0); 1319 } 1320 1321 #undef __FUNCT__ 1322 #define __FUNCT__ "MatMult_MPIBAIJ" 1323 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1324 { 1325 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1326 PetscErrorCode ierr; 1327 PetscInt nt; 1328 1329 PetscFunctionBegin; 1330 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1331 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1332 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1333 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1334 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1335 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1336 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1337 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1338 PetscFunctionReturn(0); 1339 } 1340 1341 #undef __FUNCT__ 1342 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1343 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1344 { 1345 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1346 PetscErrorCode ierr; 1347 1348 PetscFunctionBegin; 1349 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1350 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1351 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1352 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1353 PetscFunctionReturn(0); 1354 } 1355 1356 #undef __FUNCT__ 1357 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1358 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1359 { 1360 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1361 PetscErrorCode ierr; 1362 PetscBool merged; 1363 1364 PetscFunctionBegin; 1365 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1366 /* do nondiagonal part */ 1367 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1368 if (!merged) { 1369 /* send it on its way */ 1370 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1371 /* do local part */ 1372 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1373 /* receive remote parts: note this assumes the values are not actually */ 1374 /* inserted in yy until the next line */ 1375 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1376 } else { 1377 /* do local part */ 1378 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1379 /* send it on its way */ 1380 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1381 /* values actually were received in the Begin() but we need to call this nop */ 1382 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1383 } 1384 PetscFunctionReturn(0); 1385 } 1386 1387 #undef __FUNCT__ 1388 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1389 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1390 { 1391 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1392 PetscErrorCode ierr; 1393 1394 PetscFunctionBegin; 1395 /* do nondiagonal part */ 1396 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1397 /* send it on its way */ 1398 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1399 /* do local part */ 1400 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1401 /* receive remote parts: note this assumes the values are not actually */ 1402 /* inserted in yy until the next line, which is true for my implementation*/ 1403 /* but is not perhaps always true. */ 1404 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1405 PetscFunctionReturn(0); 1406 } 1407 1408 /* 1409 This only works correctly for square matrices where the subblock A->A is the 1410 diagonal block 1411 */ 1412 #undef __FUNCT__ 1413 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1414 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1415 { 1416 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1417 PetscErrorCode ierr; 1418 1419 PetscFunctionBegin; 1420 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1421 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1422 PetscFunctionReturn(0); 1423 } 1424 1425 #undef __FUNCT__ 1426 #define __FUNCT__ "MatScale_MPIBAIJ" 1427 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1428 { 1429 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1430 PetscErrorCode ierr; 1431 1432 PetscFunctionBegin; 1433 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1434 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1435 PetscFunctionReturn(0); 1436 } 1437 1438 #undef __FUNCT__ 1439 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1440 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1441 { 1442 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1443 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1444 PetscErrorCode ierr; 1445 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1446 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1447 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1448 1449 PetscFunctionBegin; 1450 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1451 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1452 mat->getrowactive = PETSC_TRUE; 1453 1454 if (!mat->rowvalues && (idx || v)) { 1455 /* 1456 allocate enough space to hold information from the longest row. 1457 */ 1458 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1459 PetscInt max = 1,mbs = mat->mbs,tmp; 1460 for (i=0; i<mbs; i++) { 1461 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1462 if (max < tmp) { max = tmp; } 1463 } 1464 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1465 } 1466 lrow = row - brstart; 1467 1468 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1469 if (!v) {pvA = 0; pvB = 0;} 1470 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1471 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1472 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1473 nztot = nzA + nzB; 1474 1475 cmap = mat->garray; 1476 if (v || idx) { 1477 if (nztot) { 1478 /* Sort by increasing column numbers, assuming A and B already sorted */ 1479 PetscInt imark = -1; 1480 if (v) { 1481 *v = v_p = mat->rowvalues; 1482 for (i=0; i<nzB; i++) { 1483 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1484 else break; 1485 } 1486 imark = i; 1487 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1488 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1489 } 1490 if (idx) { 1491 *idx = idx_p = mat->rowindices; 1492 if (imark > -1) { 1493 for (i=0; i<imark; i++) { 1494 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1495 } 1496 } else { 1497 for (i=0; i<nzB; i++) { 1498 if (cmap[cworkB[i]/bs] < cstart) 1499 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1500 else break; 1501 } 1502 imark = i; 1503 } 1504 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1505 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1506 } 1507 } else { 1508 if (idx) *idx = 0; 1509 if (v) *v = 0; 1510 } 1511 } 1512 *nz = nztot; 1513 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1514 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1515 PetscFunctionReturn(0); 1516 } 1517 1518 #undef __FUNCT__ 1519 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1520 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1521 { 1522 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1523 1524 PetscFunctionBegin; 1525 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1526 baij->getrowactive = PETSC_FALSE; 1527 PetscFunctionReturn(0); 1528 } 1529 1530 #undef __FUNCT__ 1531 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1532 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1533 { 1534 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1535 PetscErrorCode ierr; 1536 1537 PetscFunctionBegin; 1538 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1539 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1540 PetscFunctionReturn(0); 1541 } 1542 1543 #undef __FUNCT__ 1544 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1545 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1546 { 1547 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1548 Mat A = a->A,B = a->B; 1549 PetscErrorCode ierr; 1550 PetscReal isend[5],irecv[5]; 1551 1552 PetscFunctionBegin; 1553 info->block_size = (PetscReal)matin->rmap->bs; 1554 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1555 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1556 isend[3] = info->memory; isend[4] = info->mallocs; 1557 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1558 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1559 isend[3] += info->memory; isend[4] += info->mallocs; 1560 if (flag == MAT_LOCAL) { 1561 info->nz_used = isend[0]; 1562 info->nz_allocated = isend[1]; 1563 info->nz_unneeded = isend[2]; 1564 info->memory = isend[3]; 1565 info->mallocs = isend[4]; 1566 } else if (flag == MAT_GLOBAL_MAX) { 1567 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1568 info->nz_used = irecv[0]; 1569 info->nz_allocated = irecv[1]; 1570 info->nz_unneeded = irecv[2]; 1571 info->memory = irecv[3]; 1572 info->mallocs = irecv[4]; 1573 } else if (flag == MAT_GLOBAL_SUM) { 1574 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1575 info->nz_used = irecv[0]; 1576 info->nz_allocated = irecv[1]; 1577 info->nz_unneeded = irecv[2]; 1578 info->memory = irecv[3]; 1579 info->mallocs = irecv[4]; 1580 } else { 1581 SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1582 } 1583 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1584 info->fill_ratio_needed = 0; 1585 info->factor_mallocs = 0; 1586 PetscFunctionReturn(0); 1587 } 1588 1589 #undef __FUNCT__ 1590 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1591 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1592 { 1593 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1594 PetscErrorCode ierr; 1595 1596 PetscFunctionBegin; 1597 switch (op) { 1598 case MAT_NEW_NONZERO_LOCATIONS: 1599 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1600 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1601 case MAT_KEEP_NONZERO_PATTERN: 1602 case MAT_NEW_NONZERO_LOCATION_ERR: 1603 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1604 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1605 break; 1606 case MAT_ROW_ORIENTED: 1607 a->roworiented = flg; 1608 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1609 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1610 break; 1611 case MAT_NEW_DIAGONALS: 1612 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1613 break; 1614 case MAT_IGNORE_OFF_PROC_ENTRIES: 1615 a->donotstash = flg; 1616 break; 1617 case MAT_USE_HASH_TABLE: 1618 a->ht_flag = flg; 1619 break; 1620 case MAT_SYMMETRIC: 1621 case MAT_STRUCTURALLY_SYMMETRIC: 1622 case MAT_HERMITIAN: 1623 case MAT_SYMMETRY_ETERNAL: 1624 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1625 break; 1626 default: 1627 SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op); 1628 } 1629 PetscFunctionReturn(0); 1630 } 1631 1632 #undef __FUNCT__ 1633 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1634 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1635 { 1636 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1637 Mat_SeqBAIJ *Aloc; 1638 Mat B; 1639 PetscErrorCode ierr; 1640 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1641 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1642 MatScalar *a; 1643 1644 PetscFunctionBegin; 1645 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1646 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1647 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1648 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1649 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1650 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1651 } else { 1652 B = *matout; 1653 } 1654 1655 /* copy over the A part */ 1656 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1657 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1658 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1659 1660 for (i=0; i<mbs; i++) { 1661 rvals[0] = bs*(baij->rstartbs + i); 1662 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1663 for (j=ai[i]; j<ai[i+1]; j++) { 1664 col = (baij->cstartbs+aj[j])*bs; 1665 for (k=0; k<bs; k++) { 1666 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1667 col++; a += bs; 1668 } 1669 } 1670 } 1671 /* copy over the B part */ 1672 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1673 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1674 for (i=0; i<mbs; i++) { 1675 rvals[0] = bs*(baij->rstartbs + i); 1676 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1677 for (j=ai[i]; j<ai[i+1]; j++) { 1678 col = baij->garray[aj[j]]*bs; 1679 for (k=0; k<bs; k++) { 1680 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1681 col++; a += bs; 1682 } 1683 } 1684 } 1685 ierr = PetscFree(rvals);CHKERRQ(ierr); 1686 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1687 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1688 1689 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1690 *matout = B; 1691 } else { 1692 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1693 } 1694 PetscFunctionReturn(0); 1695 } 1696 1697 #undef __FUNCT__ 1698 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1699 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1700 { 1701 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1702 Mat a = baij->A,b = baij->B; 1703 PetscErrorCode ierr; 1704 PetscInt s1,s2,s3; 1705 1706 PetscFunctionBegin; 1707 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1708 if (rr) { 1709 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1710 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1711 /* Overlap communication with computation. */ 1712 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1713 } 1714 if (ll) { 1715 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1716 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1717 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1718 } 1719 /* scale the diagonal block */ 1720 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1721 1722 if (rr) { 1723 /* Do a scatter end and then right scale the off-diagonal block */ 1724 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1725 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1726 } 1727 1728 PetscFunctionReturn(0); 1729 } 1730 1731 #undef __FUNCT__ 1732 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1733 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1734 { 1735 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1736 PetscErrorCode ierr; 1737 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1738 PetscInt i,*owners = A->rmap->range; 1739 PetscInt *nprocs,j,idx,nsends,row; 1740 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1741 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1742 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1743 MPI_Comm comm = ((PetscObject)A)->comm; 1744 MPI_Request *send_waits,*recv_waits; 1745 MPI_Status recv_status,*send_status; 1746 const PetscScalar *xx; 1747 PetscScalar *bb; 1748 #if defined(PETSC_DEBUG) 1749 PetscBool found = PETSC_FALSE; 1750 #endif 1751 1752 PetscFunctionBegin; 1753 /* first count number of contributors to each processor */ 1754 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1755 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1756 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1757 j = 0; 1758 for (i=0; i<N; i++) { 1759 if (lastidx > (idx = rows[i])) j = 0; 1760 lastidx = idx; 1761 for (; j<size; j++) { 1762 if (idx >= owners[j] && idx < owners[j+1]) { 1763 nprocs[2*j]++; 1764 nprocs[2*j+1] = 1; 1765 owner[i] = j; 1766 #if defined(PETSC_DEBUG) 1767 found = PETSC_TRUE; 1768 #endif 1769 break; 1770 } 1771 } 1772 #if defined(PETSC_DEBUG) 1773 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1774 found = PETSC_FALSE; 1775 #endif 1776 } 1777 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1778 1779 if (A->nooffproczerorows) { 1780 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 1781 nrecvs = nsends; 1782 nmax = N; 1783 } else { 1784 /* inform other processors of number of messages and max length*/ 1785 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1786 } 1787 1788 /* post receives: */ 1789 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1790 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1791 for (i=0; i<nrecvs; i++) { 1792 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1793 } 1794 1795 /* do sends: 1796 1) starts[i] gives the starting index in svalues for stuff going to 1797 the ith processor 1798 */ 1799 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1800 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1801 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1802 starts[0] = 0; 1803 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1804 for (i=0; i<N; i++) { 1805 svalues[starts[owner[i]]++] = rows[i]; 1806 } 1807 1808 starts[0] = 0; 1809 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1810 count = 0; 1811 for (i=0; i<size; i++) { 1812 if (nprocs[2*i+1]) { 1813 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1814 } 1815 } 1816 ierr = PetscFree(starts);CHKERRQ(ierr); 1817 1818 base = owners[rank]; 1819 1820 /* wait on receives */ 1821 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1822 count = nrecvs; 1823 slen = 0; 1824 while (count) { 1825 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1826 /* unpack receives into our local space */ 1827 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1828 source[imdex] = recv_status.MPI_SOURCE; 1829 lens[imdex] = n; 1830 slen += n; 1831 count--; 1832 } 1833 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1834 1835 /* move the data into the send scatter */ 1836 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1837 count = 0; 1838 for (i=0; i<nrecvs; i++) { 1839 values = rvalues + i*nmax; 1840 for (j=0; j<lens[i]; j++) { 1841 lrows[count++] = values[j] - base; 1842 } 1843 } 1844 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1845 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1846 ierr = PetscFree(owner);CHKERRQ(ierr); 1847 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1848 1849 /* fix right hand side if needed */ 1850 if (x && b) { 1851 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1852 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1853 for (i=0; i<slen; i++) { 1854 bb[lrows[i]] = diag*xx[lrows[i]]; 1855 } 1856 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1857 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1858 } 1859 1860 /* actually zap the local rows */ 1861 /* 1862 Zero the required rows. If the "diagonal block" of the matrix 1863 is square and the user wishes to set the diagonal we use separate 1864 code so that MatSetValues() is not called for each diagonal allocating 1865 new memory, thus calling lots of mallocs and slowing things down. 1866 1867 */ 1868 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1869 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1870 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1871 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 1872 } else if (diag != 0.0) { 1873 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1874 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1875 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1876 for (i=0; i<slen; i++) { 1877 row = lrows[i] + rstart_bs; 1878 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1879 } 1880 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1881 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1882 } else { 1883 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1884 } 1885 1886 ierr = PetscFree(lrows);CHKERRQ(ierr); 1887 1888 /* wait on sends */ 1889 if (nsends) { 1890 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1891 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1892 ierr = PetscFree(send_status);CHKERRQ(ierr); 1893 } 1894 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1895 ierr = PetscFree(svalues);CHKERRQ(ierr); 1896 1897 PetscFunctionReturn(0); 1898 } 1899 1900 #undef __FUNCT__ 1901 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1902 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1903 { 1904 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1905 PetscErrorCode ierr; 1906 1907 PetscFunctionBegin; 1908 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1909 PetscFunctionReturn(0); 1910 } 1911 1912 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1913 1914 #undef __FUNCT__ 1915 #define __FUNCT__ "MatEqual_MPIBAIJ" 1916 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1917 { 1918 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1919 Mat a,b,c,d; 1920 PetscBool flg; 1921 PetscErrorCode ierr; 1922 1923 PetscFunctionBegin; 1924 a = matA->A; b = matA->B; 1925 c = matB->A; d = matB->B; 1926 1927 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1928 if (flg) { 1929 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1930 } 1931 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1932 PetscFunctionReturn(0); 1933 } 1934 1935 #undef __FUNCT__ 1936 #define __FUNCT__ "MatCopy_MPIBAIJ" 1937 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1938 { 1939 PetscErrorCode ierr; 1940 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1941 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1942 1943 PetscFunctionBegin; 1944 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1945 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1946 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1947 } else { 1948 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1949 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1950 } 1951 PetscFunctionReturn(0); 1952 } 1953 1954 #undef __FUNCT__ 1955 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1956 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1957 { 1958 PetscErrorCode ierr; 1959 1960 PetscFunctionBegin; 1961 ierr = MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1962 PetscFunctionReturn(0); 1963 } 1964 1965 #undef __FUNCT__ 1966 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1967 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1968 { 1969 PetscErrorCode ierr; 1970 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1971 PetscBLASInt bnz,one=1; 1972 Mat_SeqBAIJ *x,*y; 1973 1974 PetscFunctionBegin; 1975 if (str == SAME_NONZERO_PATTERN) { 1976 PetscScalar alpha = a; 1977 x = (Mat_SeqBAIJ *)xx->A->data; 1978 y = (Mat_SeqBAIJ *)yy->A->data; 1979 bnz = PetscBLASIntCast(x->nz); 1980 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1981 x = (Mat_SeqBAIJ *)xx->B->data; 1982 y = (Mat_SeqBAIJ *)yy->B->data; 1983 bnz = PetscBLASIntCast(x->nz); 1984 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1985 } else { 1986 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1987 } 1988 PetscFunctionReturn(0); 1989 } 1990 1991 #undef __FUNCT__ 1992 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ" 1993 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs) 1994 { 1995 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1996 PetscInt rbs,cbs; 1997 PetscErrorCode ierr; 1998 1999 PetscFunctionBegin; 2000 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 2001 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 2002 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 2003 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 2004 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 2005 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 2006 PetscFunctionReturn(0); 2007 } 2008 2009 #undef __FUNCT__ 2010 #define __FUNCT__ "MatRealPart_MPIBAIJ" 2011 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 2012 { 2013 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2014 PetscErrorCode ierr; 2015 2016 PetscFunctionBegin; 2017 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2018 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2019 PetscFunctionReturn(0); 2020 } 2021 2022 #undef __FUNCT__ 2023 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 2024 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2025 { 2026 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2027 PetscErrorCode ierr; 2028 2029 PetscFunctionBegin; 2030 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2031 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2032 PetscFunctionReturn(0); 2033 } 2034 2035 #undef __FUNCT__ 2036 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2037 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2038 { 2039 PetscErrorCode ierr; 2040 IS iscol_local; 2041 PetscInt csize; 2042 2043 PetscFunctionBegin; 2044 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2045 if (call == MAT_REUSE_MATRIX) { 2046 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2047 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2048 } else { 2049 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2050 } 2051 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2052 if (call == MAT_INITIAL_MATRIX) { 2053 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2054 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 2055 } 2056 PetscFunctionReturn(0); 2057 } 2058 2059 #undef __FUNCT__ 2060 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2061 /* 2062 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2063 in local and then by concatenating the local matrices the end result. 2064 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2065 */ 2066 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2067 { 2068 PetscErrorCode ierr; 2069 PetscMPIInt rank,size; 2070 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2071 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2072 Mat *local,M,Mreuse; 2073 MatScalar *vwork,*aa; 2074 MPI_Comm comm = ((PetscObject)mat)->comm; 2075 Mat_SeqBAIJ *aij; 2076 2077 2078 PetscFunctionBegin; 2079 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2080 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2081 2082 if (call == MAT_REUSE_MATRIX) { 2083 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2084 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2085 local = &Mreuse; 2086 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2087 } else { 2088 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2089 Mreuse = *local; 2090 ierr = PetscFree(local);CHKERRQ(ierr); 2091 } 2092 2093 /* 2094 m - number of local rows 2095 n - number of columns (same on all processors) 2096 rstart - first row in new global matrix generated 2097 */ 2098 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2099 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2100 m = m/bs; 2101 n = n/bs; 2102 2103 if (call == MAT_INITIAL_MATRIX) { 2104 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2105 ii = aij->i; 2106 jj = aij->j; 2107 2108 /* 2109 Determine the number of non-zeros in the diagonal and off-diagonal 2110 portions of the matrix in order to do correct preallocation 2111 */ 2112 2113 /* first get start and end of "diagonal" columns */ 2114 if (csize == PETSC_DECIDE) { 2115 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2116 if (mglobal == n*bs) { /* square matrix */ 2117 nlocal = m; 2118 } else { 2119 nlocal = n/size + ((n % size) > rank); 2120 } 2121 } else { 2122 nlocal = csize/bs; 2123 } 2124 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2125 rstart = rend - nlocal; 2126 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2127 2128 /* next, compute all the lengths */ 2129 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2130 olens = dlens + m; 2131 for (i=0; i<m; i++) { 2132 jend = ii[i+1] - ii[i]; 2133 olen = 0; 2134 dlen = 0; 2135 for (j=0; j<jend; j++) { 2136 if (*jj < rstart || *jj >= rend) olen++; 2137 else dlen++; 2138 jj++; 2139 } 2140 olens[i] = olen; 2141 dlens[i] = dlen; 2142 } 2143 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2144 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2145 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2146 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2147 ierr = PetscFree(dlens);CHKERRQ(ierr); 2148 } else { 2149 PetscInt ml,nl; 2150 2151 M = *newmat; 2152 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2153 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2154 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2155 /* 2156 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2157 rather than the slower MatSetValues(). 2158 */ 2159 M->was_assembled = PETSC_TRUE; 2160 M->assembled = PETSC_FALSE; 2161 } 2162 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2163 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2164 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2165 ii = aij->i; 2166 jj = aij->j; 2167 aa = aij->a; 2168 for (i=0; i<m; i++) { 2169 row = rstart/bs + i; 2170 nz = ii[i+1] - ii[i]; 2171 cwork = jj; jj += nz; 2172 vwork = aa; aa += nz; 2173 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2174 } 2175 2176 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2177 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2178 *newmat = M; 2179 2180 /* save submatrix used in processor for next request */ 2181 if (call == MAT_INITIAL_MATRIX) { 2182 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2183 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2184 } 2185 2186 PetscFunctionReturn(0); 2187 } 2188 2189 #undef __FUNCT__ 2190 #define __FUNCT__ "MatPermute_MPIBAIJ" 2191 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2192 { 2193 MPI_Comm comm,pcomm; 2194 PetscInt first,local_size,nrows; 2195 const PetscInt *rows; 2196 PetscMPIInt size; 2197 IS crowp,growp,irowp,lrowp,lcolp,icolp; 2198 PetscErrorCode ierr; 2199 2200 PetscFunctionBegin; 2201 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2202 /* make a collective version of 'rowp' */ 2203 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2204 if (pcomm==comm) { 2205 crowp = rowp; 2206 } else { 2207 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2208 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2209 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2210 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2211 } 2212 /* collect the global row permutation and invert it */ 2213 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2214 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2215 if (pcomm!=comm) { 2216 ierr = ISDestroy(crowp);CHKERRQ(ierr); 2217 } 2218 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2219 /* get the local target indices */ 2220 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2221 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 2222 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2223 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 2224 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2225 ierr = ISDestroy(irowp);CHKERRQ(ierr); 2226 /* the column permutation is so much easier; 2227 make a local version of 'colp' and invert it */ 2228 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2229 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2230 if (size==1) { 2231 lcolp = colp; 2232 } else { 2233 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 2234 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 2235 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr); 2236 } 2237 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2238 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 2239 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 2240 if (size>1) { 2241 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 2242 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 2243 } 2244 /* now we just get the submatrix */ 2245 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2246 /* clean up */ 2247 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 2248 ierr = ISDestroy(icolp);CHKERRQ(ierr); 2249 PetscFunctionReturn(0); 2250 } 2251 2252 #undef __FUNCT__ 2253 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2254 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2255 { 2256 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2257 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2258 2259 PetscFunctionBegin; 2260 if (nghosts) { *nghosts = B->nbs;} 2261 if (ghosts) {*ghosts = baij->garray;} 2262 PetscFunctionReturn(0); 2263 } 2264 2265 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat); 2266 2267 #undef __FUNCT__ 2268 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2269 /* 2270 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2271 */ 2272 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2273 { 2274 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2275 PetscErrorCode ierr; 2276 PetscMPIInt size,*ncolsonproc,*disp,nn; 2277 PetscInt bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col; 2278 const PetscInt *is; 2279 PetscInt nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj; 2280 PetscInt *rowhit,M,cstart,cend,colb; 2281 PetscInt *columnsforrow,l; 2282 IS *isa; 2283 PetscBool done,flg; 2284 ISLocalToGlobalMapping map = mat->bmapping; 2285 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2286 2287 PetscFunctionBegin; 2288 if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2289 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2290 2291 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2292 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2293 M = mat->rmap->n/bs; 2294 cstart = mat->cmap->rstart/bs; 2295 cend = mat->cmap->rend/bs; 2296 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2297 c->N = mat->cmap->N/bs; 2298 c->m = mat->rmap->n/bs; 2299 c->rstart = mat->rmap->rstart/bs; 2300 2301 c->ncolors = nis; 2302 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2303 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2304 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2305 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2306 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2307 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2308 2309 /* Allow access to data structures of local part of matrix */ 2310 if (!baij->colmap) { 2311 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2312 } 2313 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2314 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2315 2316 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2317 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2318 2319 for (i=0; i<nis; i++) { 2320 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2321 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2322 c->ncolumns[i] = n; 2323 if (n) { 2324 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2325 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2326 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2327 } else { 2328 c->columns[i] = 0; 2329 } 2330 2331 if (ctype == IS_COLORING_GLOBAL){ 2332 /* Determine the total (parallel) number of columns of this color */ 2333 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2334 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2335 2336 nn = PetscMPIIntCast(n); 2337 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2338 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2339 if (!nctot) { 2340 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2341 } 2342 2343 disp[0] = 0; 2344 for (j=1; j<size; j++) { 2345 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2346 } 2347 2348 /* Get complete list of columns for color on each processor */ 2349 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2350 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2351 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2352 } else if (ctype == IS_COLORING_GHOSTED){ 2353 /* Determine local number of columns of this color on this process, including ghost points */ 2354 nctot = n; 2355 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2356 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2357 } else { 2358 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2359 } 2360 2361 /* 2362 Mark all rows affect by these columns 2363 */ 2364 /* Temporary option to allow for debugging/testing */ 2365 flg = PETSC_FALSE; 2366 ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2367 if (!flg) {/*-----------------------------------------------------------------------------*/ 2368 /* crude, fast version */ 2369 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2370 /* loop over columns*/ 2371 for (j=0; j<nctot; j++) { 2372 if (ctype == IS_COLORING_GHOSTED) { 2373 col = ltog[cols[j]]; 2374 } else { 2375 col = cols[j]; 2376 } 2377 if (col >= cstart && col < cend) { 2378 /* column is in diagonal block of matrix */ 2379 rows = A_cj + A_ci[col-cstart]; 2380 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2381 } else { 2382 #if defined (PETSC_USE_CTABLE) 2383 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2384 colb --; 2385 #else 2386 colb = baij->colmap[col] - 1; 2387 #endif 2388 if (colb == -1) { 2389 m = 0; 2390 } else { 2391 colb = colb/bs; 2392 rows = B_cj + B_ci[colb]; 2393 m = B_ci[colb+1] - B_ci[colb]; 2394 } 2395 } 2396 /* loop over columns marking them in rowhit */ 2397 for (k=0; k<m; k++) { 2398 rowhit[*rows++] = col + 1; 2399 } 2400 } 2401 2402 /* count the number of hits */ 2403 nrows = 0; 2404 for (j=0; j<M; j++) { 2405 if (rowhit[j]) nrows++; 2406 } 2407 c->nrows[i] = nrows; 2408 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2409 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2410 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2411 nrows = 0; 2412 for (j=0; j<M; j++) { 2413 if (rowhit[j]) { 2414 c->rows[i][nrows] = j; 2415 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2416 nrows++; 2417 } 2418 } 2419 } else {/*-------------------------------------------------------------------------------*/ 2420 /* slow version, using rowhit as a linked list */ 2421 PetscInt currentcol,fm,mfm; 2422 rowhit[M] = M; 2423 nrows = 0; 2424 /* loop over columns*/ 2425 for (j=0; j<nctot; j++) { 2426 if (ctype == IS_COLORING_GHOSTED) { 2427 col = ltog[cols[j]]; 2428 } else { 2429 col = cols[j]; 2430 } 2431 if (col >= cstart && col < cend) { 2432 /* column is in diagonal block of matrix */ 2433 rows = A_cj + A_ci[col-cstart]; 2434 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2435 } else { 2436 #if defined (PETSC_USE_CTABLE) 2437 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2438 colb --; 2439 #else 2440 colb = baij->colmap[col] - 1; 2441 #endif 2442 if (colb == -1) { 2443 m = 0; 2444 } else { 2445 colb = colb/bs; 2446 rows = B_cj + B_ci[colb]; 2447 m = B_ci[colb+1] - B_ci[colb]; 2448 } 2449 } 2450 2451 /* loop over columns marking them in rowhit */ 2452 fm = M; /* fm points to first entry in linked list */ 2453 for (k=0; k<m; k++) { 2454 currentcol = *rows++; 2455 /* is it already in the list? */ 2456 do { 2457 mfm = fm; 2458 fm = rowhit[fm]; 2459 } while (fm < currentcol); 2460 /* not in list so add it */ 2461 if (fm != currentcol) { 2462 nrows++; 2463 columnsforrow[currentcol] = col; 2464 /* next three lines insert new entry into linked list */ 2465 rowhit[mfm] = currentcol; 2466 rowhit[currentcol] = fm; 2467 fm = currentcol; 2468 /* fm points to present position in list since we know the columns are sorted */ 2469 } else { 2470 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2471 } 2472 } 2473 } 2474 c->nrows[i] = nrows; 2475 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2476 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2477 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2478 /* now store the linked list of rows into c->rows[i] */ 2479 nrows = 0; 2480 fm = rowhit[M]; 2481 do { 2482 c->rows[i][nrows] = fm; 2483 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2484 fm = rowhit[fm]; 2485 } while (fm < M); 2486 } /* ---------------------------------------------------------------------------------------*/ 2487 ierr = PetscFree(cols);CHKERRQ(ierr); 2488 } 2489 2490 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2491 /* 2492 vscale will contain the "diagonal" on processor scalings followed by the off processor 2493 */ 2494 if (ctype == IS_COLORING_GLOBAL) { 2495 PetscInt *garray; 2496 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2497 for (i=0; i<baij->B->cmap->n/bs; i++) { 2498 for (j=0; j<bs; j++) { 2499 garray[i*bs+j] = bs*baij->garray[i]+j; 2500 } 2501 } 2502 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2503 ierr = PetscFree(garray);CHKERRQ(ierr); 2504 CHKMEMQ; 2505 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2506 for (k=0; k<c->ncolors; k++) { 2507 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2508 for (l=0; l<c->nrows[k]; l++) { 2509 col = c->columnsforrow[k][l]; 2510 if (col >= cstart && col < cend) { 2511 /* column is in diagonal block of matrix */ 2512 colb = col - cstart; 2513 } else { 2514 /* column is in "off-processor" part */ 2515 #if defined (PETSC_USE_CTABLE) 2516 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2517 colb --; 2518 #else 2519 colb = baij->colmap[col] - 1; 2520 #endif 2521 colb = colb/bs; 2522 colb += cend - cstart; 2523 } 2524 c->vscaleforrow[k][l] = colb; 2525 } 2526 } 2527 } else if (ctype == IS_COLORING_GHOSTED) { 2528 /* Get gtol mapping */ 2529 PetscInt N = mat->cmap->N, *gtol; 2530 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2531 for (i=0; i<N; i++) gtol[i] = -1; 2532 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2533 2534 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2535 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2536 for (k=0; k<c->ncolors; k++) { 2537 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2538 for (l=0; l<c->nrows[k]; l++) { 2539 col = c->columnsforrow[k][l]; /* global column index */ 2540 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2541 } 2542 } 2543 ierr = PetscFree(gtol);CHKERRQ(ierr); 2544 } 2545 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2546 2547 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2548 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2549 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2550 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2551 CHKMEMQ; 2552 PetscFunctionReturn(0); 2553 } 2554 2555 #undef __FUNCT__ 2556 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ" 2557 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat) 2558 { 2559 Mat B; 2560 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2561 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2562 Mat_SeqAIJ *b; 2563 PetscErrorCode ierr; 2564 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2565 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2566 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2567 2568 PetscFunctionBegin; 2569 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2570 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2571 2572 /* ---------------------------------------------------------------- 2573 Tell every processor the number of nonzeros per row 2574 */ 2575 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2576 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2577 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2578 } 2579 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2580 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2581 displs = recvcounts + size; 2582 for (i=0; i<size; i++) { 2583 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2584 displs[i] = A->rmap->range[i]/bs; 2585 } 2586 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2587 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2588 #else 2589 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2590 #endif 2591 /* --------------------------------------------------------------- 2592 Create the sequential matrix of the same type as the local block diagonal 2593 */ 2594 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2595 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2596 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2597 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2598 b = (Mat_SeqAIJ *)B->data; 2599 2600 /*-------------------------------------------------------------------- 2601 Copy my part of matrix column indices over 2602 */ 2603 sendcount = ad->nz + bd->nz; 2604 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2605 a_jsendbuf = ad->j; 2606 b_jsendbuf = bd->j; 2607 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2608 cnt = 0; 2609 for (i=0; i<n; i++) { 2610 2611 /* put in lower diagonal portion */ 2612 m = bd->i[i+1] - bd->i[i]; 2613 while (m > 0) { 2614 /* is it above diagonal (in bd (compressed) numbering) */ 2615 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2616 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2617 m--; 2618 } 2619 2620 /* put in diagonal portion */ 2621 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2622 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2623 } 2624 2625 /* put in upper diagonal portion */ 2626 while (m-- > 0) { 2627 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2628 } 2629 } 2630 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2631 2632 /*-------------------------------------------------------------------- 2633 Gather all column indices to all processors 2634 */ 2635 for (i=0; i<size; i++) { 2636 recvcounts[i] = 0; 2637 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2638 recvcounts[i] += lens[j]; 2639 } 2640 } 2641 displs[0] = 0; 2642 for (i=1; i<size; i++) { 2643 displs[i] = displs[i-1] + recvcounts[i-1]; 2644 } 2645 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2646 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2647 #else 2648 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2649 #endif 2650 /*-------------------------------------------------------------------- 2651 Assemble the matrix into useable form (note numerical values not yet set) 2652 */ 2653 /* set the b->ilen (length of each row) values */ 2654 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2655 /* set the b->i indices */ 2656 b->i[0] = 0; 2657 for (i=1; i<=A->rmap->N/bs; i++) { 2658 b->i[i] = b->i[i-1] + lens[i-1]; 2659 } 2660 ierr = PetscFree(lens);CHKERRQ(ierr); 2661 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2662 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2663 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2664 2665 if (A->symmetric){ 2666 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2667 } else if (A->hermitian) { 2668 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2669 } else if (A->structurally_symmetric) { 2670 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2671 } 2672 *newmat = B; 2673 PetscFunctionReturn(0); 2674 } 2675 2676 #undef __FUNCT__ 2677 #define __FUNCT__ "MatSOR_MPIBAIJ" 2678 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2679 { 2680 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2681 PetscErrorCode ierr; 2682 Vec bb1 = 0; 2683 2684 PetscFunctionBegin; 2685 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2686 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2687 } 2688 2689 if (flag == SOR_APPLY_UPPER) { 2690 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2691 PetscFunctionReturn(0); 2692 } 2693 2694 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2695 if (flag & SOR_ZERO_INITIAL_GUESS) { 2696 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2697 its--; 2698 } 2699 2700 while (its--) { 2701 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2702 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2703 2704 /* update rhs: bb1 = bb - B*x */ 2705 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2706 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2707 2708 /* local sweep */ 2709 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2710 } 2711 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 2712 if (flag & SOR_ZERO_INITIAL_GUESS) { 2713 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2714 its--; 2715 } 2716 while (its--) { 2717 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2718 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2719 2720 /* update rhs: bb1 = bb - B*x */ 2721 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2722 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2723 2724 /* local sweep */ 2725 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2726 } 2727 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 2728 if (flag & SOR_ZERO_INITIAL_GUESS) { 2729 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2730 its--; 2731 } 2732 while (its--) { 2733 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2734 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2735 2736 /* update rhs: bb1 = bb - B*x */ 2737 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2738 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2739 2740 /* local sweep */ 2741 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2742 } 2743 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2744 2745 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 2746 PetscFunctionReturn(0); 2747 } 2748 2749 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2750 2751 2752 /* -------------------------------------------------------------------*/ 2753 static struct _MatOps MatOps_Values = { 2754 MatSetValues_MPIBAIJ, 2755 MatGetRow_MPIBAIJ, 2756 MatRestoreRow_MPIBAIJ, 2757 MatMult_MPIBAIJ, 2758 /* 4*/ MatMultAdd_MPIBAIJ, 2759 MatMultTranspose_MPIBAIJ, 2760 MatMultTransposeAdd_MPIBAIJ, 2761 0, 2762 0, 2763 0, 2764 /*10*/ 0, 2765 0, 2766 0, 2767 MatSOR_MPIBAIJ, 2768 MatTranspose_MPIBAIJ, 2769 /*15*/ MatGetInfo_MPIBAIJ, 2770 MatEqual_MPIBAIJ, 2771 MatGetDiagonal_MPIBAIJ, 2772 MatDiagonalScale_MPIBAIJ, 2773 MatNorm_MPIBAIJ, 2774 /*20*/ MatAssemblyBegin_MPIBAIJ, 2775 MatAssemblyEnd_MPIBAIJ, 2776 MatSetOption_MPIBAIJ, 2777 MatZeroEntries_MPIBAIJ, 2778 /*24*/ MatZeroRows_MPIBAIJ, 2779 0, 2780 0, 2781 0, 2782 0, 2783 /*29*/ MatSetUpPreallocation_MPIBAIJ, 2784 0, 2785 0, 2786 0, 2787 0, 2788 /*34*/ MatDuplicate_MPIBAIJ, 2789 0, 2790 0, 2791 0, 2792 0, 2793 /*39*/ MatAXPY_MPIBAIJ, 2794 MatGetSubMatrices_MPIBAIJ, 2795 MatIncreaseOverlap_MPIBAIJ, 2796 MatGetValues_MPIBAIJ, 2797 MatCopy_MPIBAIJ, 2798 /*44*/ 0, 2799 MatScale_MPIBAIJ, 2800 0, 2801 0, 2802 0, 2803 /*49*/ MatSetBlockSize_MPIBAIJ, 2804 0, 2805 0, 2806 0, 2807 0, 2808 /*54*/ MatFDColoringCreate_MPIBAIJ, 2809 0, 2810 MatSetUnfactored_MPIBAIJ, 2811 MatPermute_MPIBAIJ, 2812 MatSetValuesBlocked_MPIBAIJ, 2813 /*59*/ MatGetSubMatrix_MPIBAIJ, 2814 MatDestroy_MPIBAIJ, 2815 MatView_MPIBAIJ, 2816 0, 2817 0, 2818 /*64*/ 0, 2819 0, 2820 0, 2821 0, 2822 0, 2823 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2824 0, 2825 0, 2826 0, 2827 0, 2828 /*74*/ 0, 2829 MatFDColoringApply_BAIJ, 2830 0, 2831 0, 2832 0, 2833 /*79*/ 0, 2834 0, 2835 0, 2836 0, 2837 MatLoad_MPIBAIJ, 2838 /*84*/ 0, 2839 0, 2840 0, 2841 0, 2842 0, 2843 /*89*/ 0, 2844 0, 2845 0, 2846 0, 2847 0, 2848 /*94*/ 0, 2849 0, 2850 0, 2851 0, 2852 0, 2853 /*99*/ 0, 2854 0, 2855 0, 2856 0, 2857 0, 2858 /*104*/0, 2859 MatRealPart_MPIBAIJ, 2860 MatImaginaryPart_MPIBAIJ, 2861 0, 2862 0, 2863 /*109*/0, 2864 0, 2865 0, 2866 0, 2867 0, 2868 /*114*/MatGetSeqNonzerostructure_MPIBAIJ, 2869 0, 2870 MatGetGhosts_MPIBAIJ, 2871 0, 2872 0, 2873 /*119*/0, 2874 0, 2875 0, 2876 0 2877 }; 2878 2879 EXTERN_C_BEGIN 2880 #undef __FUNCT__ 2881 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2882 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscBool *iscopy,MatReuse reuse,Mat *a) 2883 { 2884 PetscFunctionBegin; 2885 *a = ((Mat_MPIBAIJ *)A->data)->A; 2886 *iscopy = PETSC_FALSE; 2887 PetscFunctionReturn(0); 2888 } 2889 EXTERN_C_END 2890 2891 EXTERN_C_BEGIN 2892 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2893 EXTERN_C_END 2894 2895 EXTERN_C_BEGIN 2896 #undef __FUNCT__ 2897 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2898 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2899 { 2900 PetscInt m,rstart,cstart,cend; 2901 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2902 const PetscInt *JJ=0; 2903 PetscScalar *values=0; 2904 PetscErrorCode ierr; 2905 2906 PetscFunctionBegin; 2907 2908 if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2909 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2910 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2911 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2912 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2913 m = B->rmap->n/bs; 2914 rstart = B->rmap->rstart/bs; 2915 cstart = B->cmap->rstart/bs; 2916 cend = B->cmap->rend/bs; 2917 2918 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2919 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2920 for (i=0; i<m; i++) { 2921 nz = ii[i+1] - ii[i]; 2922 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2923 nz_max = PetscMax(nz_max,nz); 2924 JJ = jj + ii[i]; 2925 for (j=0; j<nz; j++) { 2926 if (*JJ >= cstart) break; 2927 JJ++; 2928 } 2929 d = 0; 2930 for (; j<nz; j++) { 2931 if (*JJ++ >= cend) break; 2932 d++; 2933 } 2934 d_nnz[i] = d; 2935 o_nnz[i] = nz - d; 2936 } 2937 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2938 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2939 2940 values = (PetscScalar*)V; 2941 if (!values) { 2942 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2943 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2944 } 2945 for (i=0; i<m; i++) { 2946 PetscInt row = i + rstart; 2947 PetscInt ncols = ii[i+1] - ii[i]; 2948 const PetscInt *icols = jj + ii[i]; 2949 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2950 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2951 } 2952 2953 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2954 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2955 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2956 2957 PetscFunctionReturn(0); 2958 } 2959 EXTERN_C_END 2960 2961 #undef __FUNCT__ 2962 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2963 /*@C 2964 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2965 (the default parallel PETSc format). 2966 2967 Collective on MPI_Comm 2968 2969 Input Parameters: 2970 + A - the matrix 2971 . bs - the block size 2972 . i - the indices into j for the start of each local row (starts with zero) 2973 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2974 - v - optional values in the matrix 2975 2976 Level: developer 2977 2978 .keywords: matrix, aij, compressed row, sparse, parallel 2979 2980 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2981 @*/ 2982 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2983 { 2984 PetscErrorCode ierr; 2985 2986 PetscFunctionBegin; 2987 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2988 PetscFunctionReturn(0); 2989 } 2990 2991 EXTERN_C_BEGIN 2992 #undef __FUNCT__ 2993 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2994 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2995 { 2996 Mat_MPIBAIJ *b; 2997 PetscErrorCode ierr; 2998 PetscInt i, newbs = PetscAbs(bs); 2999 3000 PetscFunctionBegin; 3001 if (bs < 0) { 3002 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 3003 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 3004 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3005 bs = PetscAbs(bs); 3006 } 3007 if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 3008 bs = newbs; 3009 3010 3011 if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 3012 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3013 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3014 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3015 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3016 3017 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3018 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3019 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3020 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3021 3022 if (d_nnz) { 3023 for (i=0; i<B->rmap->n/bs; i++) { 3024 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 3025 } 3026 } 3027 if (o_nnz) { 3028 for (i=0; i<B->rmap->n/bs; i++) { 3029 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 3030 } 3031 } 3032 3033 b = (Mat_MPIBAIJ*)B->data; 3034 b->bs2 = bs*bs; 3035 b->mbs = B->rmap->n/bs; 3036 b->nbs = B->cmap->n/bs; 3037 b->Mbs = B->rmap->N/bs; 3038 b->Nbs = B->cmap->N/bs; 3039 3040 for (i=0; i<=b->size; i++) { 3041 b->rangebs[i] = B->rmap->range[i]/bs; 3042 } 3043 b->rstartbs = B->rmap->rstart/bs; 3044 b->rendbs = B->rmap->rend/bs; 3045 b->cstartbs = B->cmap->rstart/bs; 3046 b->cendbs = B->cmap->rend/bs; 3047 3048 if (!B->preallocated) { 3049 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3050 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3051 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 3052 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3053 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3054 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3055 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 3056 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3057 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 3058 } 3059 3060 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3061 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 3062 B->preallocated = PETSC_TRUE; 3063 PetscFunctionReturn(0); 3064 } 3065 EXTERN_C_END 3066 3067 EXTERN_C_BEGIN 3068 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 3069 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 3070 EXTERN_C_END 3071 3072 3073 EXTERN_C_BEGIN 3074 #undef __FUNCT__ 3075 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 3076 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj) 3077 { 3078 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 3079 PetscErrorCode ierr; 3080 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 3081 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 3082 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 3083 3084 PetscFunctionBegin; 3085 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 3086 ii[0] = 0; 3087 CHKMEMQ; 3088 for (i=0; i<M; i++) { 3089 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 3090 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 3091 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 3092 /* remove one from count of matrix has diagonal */ 3093 for (j=id[i]; j<id[i+1]; j++) { 3094 if (jd[j] == i) {ii[i+1]--;break;} 3095 } 3096 CHKMEMQ; 3097 } 3098 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3099 cnt = 0; 3100 for (i=0; i<M; i++) { 3101 for (j=io[i]; j<io[i+1]; j++) { 3102 if (garray[jo[j]] > rstart) break; 3103 jj[cnt++] = garray[jo[j]]; 3104 CHKMEMQ; 3105 } 3106 for (k=id[i]; k<id[i+1]; k++) { 3107 if (jd[k] != i) { 3108 jj[cnt++] = rstart + jd[k]; 3109 CHKMEMQ; 3110 } 3111 } 3112 for (;j<io[i+1]; j++) { 3113 jj[cnt++] = garray[jo[j]]; 3114 CHKMEMQ; 3115 } 3116 } 3117 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 3118 PetscFunctionReturn(0); 3119 } 3120 EXTERN_C_END 3121 3122 #include "../src/mat/impls/aij/mpi/mpiaij.h" 3123 EXTERN_C_BEGIN 3124 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*); 3125 EXTERN_C_END 3126 3127 EXTERN_C_BEGIN 3128 #undef __FUNCT__ 3129 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 3130 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAIJ(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat) 3131 { 3132 PetscErrorCode ierr; 3133 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3134 Mat B; 3135 Mat_MPIAIJ *b = (Mat_MPIAIJ*) B->data; 3136 3137 PetscFunctionBegin; 3138 if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled"); 3139 3140 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 3141 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3142 ierr = MatSetType(B,newtype);CHKERRQ(ierr); 3143 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 3144 b = (Mat_MPIAIJ*) B->data; 3145 3146 ierr = MatDestroy(b->A);CHKERRQ(ierr); 3147 ierr = MatDestroy(b->B);CHKERRQ(ierr); 3148 ierr = DisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 3149 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 3150 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 3151 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3152 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3153 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3154 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3155 if (reuse == MAT_REUSE_MATRIX) { 3156 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3157 } else { 3158 *newmat = B; 3159 } 3160 PetscFunctionReturn(0); 3161 } 3162 EXTERN_C_END 3163 3164 EXTERN_C_BEGIN 3165 #if defined(PETSC_HAVE_MUMPS) 3166 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3167 #endif 3168 EXTERN_C_END 3169 3170 /*MC 3171 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 3172 3173 Options Database Keys: 3174 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 3175 . -mat_block_size <bs> - set the blocksize used to store the matrix 3176 - -mat_use_hash_table <fact> 3177 3178 Level: beginner 3179 3180 .seealso: MatCreateMPIBAIJ 3181 M*/ 3182 3183 EXTERN_C_BEGIN 3184 #undef __FUNCT__ 3185 #define __FUNCT__ "MatCreate_MPIBAIJ" 3186 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B) 3187 { 3188 Mat_MPIBAIJ *b; 3189 PetscErrorCode ierr; 3190 PetscBool flg; 3191 3192 PetscFunctionBegin; 3193 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 3194 B->data = (void*)b; 3195 3196 3197 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3198 B->mapping = 0; 3199 B->assembled = PETSC_FALSE; 3200 3201 B->insertmode = NOT_SET_VALUES; 3202 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 3203 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 3204 3205 /* build local table of row and column ownerships */ 3206 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 3207 3208 /* build cache for off array entries formed */ 3209 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 3210 b->donotstash = PETSC_FALSE; 3211 b->colmap = PETSC_NULL; 3212 b->garray = PETSC_NULL; 3213 b->roworiented = PETSC_TRUE; 3214 3215 /* stuff used in block assembly */ 3216 b->barray = 0; 3217 3218 /* stuff used for matrix vector multiply */ 3219 b->lvec = 0; 3220 b->Mvctx = 0; 3221 3222 /* stuff for MatGetRow() */ 3223 b->rowindices = 0; 3224 b->rowvalues = 0; 3225 b->getrowactive = PETSC_FALSE; 3226 3227 /* hash table stuff */ 3228 b->ht = 0; 3229 b->hd = 0; 3230 b->ht_size = 0; 3231 b->ht_flag = PETSC_FALSE; 3232 b->ht_fact = 0; 3233 b->ht_total_ct = 0; 3234 b->ht_insert_ct = 0; 3235 3236 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3237 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3238 if (flg) { 3239 PetscReal fact = 1.39; 3240 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3241 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 3242 if (fact <= 1.0) fact = 1.39; 3243 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3244 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3245 } 3246 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3247 3248 #if defined(PETSC_HAVE_MUMPS) 3249 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3250 #endif 3251 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 3252 "MatConvert_MPIBAIJ_MPIAdj", 3253 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3254 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C", 3255 "MatConvert_MPIBAIJ_MPIAIJ", 3256 MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3257 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3258 "MatStoreValues_MPIBAIJ", 3259 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3260 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3261 "MatRetrieveValues_MPIBAIJ", 3262 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3263 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3264 "MatGetDiagonalBlock_MPIBAIJ", 3265 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3266 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3267 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3268 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3269 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3270 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3271 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3272 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3273 "MatDiagonalScaleLocal_MPIBAIJ", 3274 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3275 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3276 "MatSetHashTableFactor_MPIBAIJ", 3277 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3278 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3279 PetscFunctionReturn(0); 3280 } 3281 EXTERN_C_END 3282 3283 /*MC 3284 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3285 3286 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3287 and MATMPIBAIJ otherwise. 3288 3289 Options Database Keys: 3290 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3291 3292 Level: beginner 3293 3294 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3295 M*/ 3296 3297 EXTERN_C_BEGIN 3298 #undef __FUNCT__ 3299 #define __FUNCT__ "MatCreate_BAIJ" 3300 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A) 3301 { 3302 PetscErrorCode ierr; 3303 PetscMPIInt size; 3304 3305 PetscFunctionBegin; 3306 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 3307 if (size == 1) { 3308 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 3309 } else { 3310 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 3311 } 3312 PetscFunctionReturn(0); 3313 } 3314 EXTERN_C_END 3315 3316 #undef __FUNCT__ 3317 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3318 /*@C 3319 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3320 (block compressed row). For good matrix assembly performance 3321 the user should preallocate the matrix storage by setting the parameters 3322 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3323 performance can be increased by more than a factor of 50. 3324 3325 Collective on Mat 3326 3327 Input Parameters: 3328 + A - the matrix 3329 . bs - size of blockk 3330 . d_nz - number of block nonzeros per block row in diagonal portion of local 3331 submatrix (same for all local rows) 3332 . d_nnz - array containing the number of block nonzeros in the various block rows 3333 of the in diagonal portion of the local (possibly different for each block 3334 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3335 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3336 submatrix (same for all local rows). 3337 - o_nnz - array containing the number of nonzeros in the various block rows of the 3338 off-diagonal portion of the local submatrix (possibly different for 3339 each block row) or PETSC_NULL. 3340 3341 If the *_nnz parameter is given then the *_nz parameter is ignored 3342 3343 Options Database Keys: 3344 + -mat_block_size - size of the blocks to use 3345 - -mat_use_hash_table <fact> 3346 3347 Notes: 3348 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3349 than it must be used on all processors that share the object for that argument. 3350 3351 Storage Information: 3352 For a square global matrix we define each processor's diagonal portion 3353 to be its local rows and the corresponding columns (a square submatrix); 3354 each processor's off-diagonal portion encompasses the remainder of the 3355 local matrix (a rectangular submatrix). 3356 3357 The user can specify preallocated storage for the diagonal part of 3358 the local submatrix with either d_nz or d_nnz (not both). Set 3359 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3360 memory allocation. Likewise, specify preallocated storage for the 3361 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3362 3363 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3364 the figure below we depict these three local rows and all columns (0-11). 3365 3366 .vb 3367 0 1 2 3 4 5 6 7 8 9 10 11 3368 ------------------- 3369 row 3 | o o o d d d o o o o o o 3370 row 4 | o o o d d d o o o o o o 3371 row 5 | o o o d d d o o o o o o 3372 ------------------- 3373 .ve 3374 3375 Thus, any entries in the d locations are stored in the d (diagonal) 3376 submatrix, and any entries in the o locations are stored in the 3377 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3378 stored simply in the MATSEQBAIJ format for compressed row storage. 3379 3380 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3381 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3382 In general, for PDE problems in which most nonzeros are near the diagonal, 3383 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3384 or you will get TERRIBLE performance; see the users' manual chapter on 3385 matrices. 3386 3387 You can call MatGetInfo() to get information on how effective the preallocation was; 3388 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3389 You can also run with the option -info and look for messages with the string 3390 malloc in them to see if additional memory allocation was needed. 3391 3392 Level: intermediate 3393 3394 .keywords: matrix, block, aij, compressed row, sparse, parallel 3395 3396 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 3397 @*/ 3398 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3399 { 3400 PetscErrorCode ierr; 3401 3402 PetscFunctionBegin; 3403 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3404 PetscFunctionReturn(0); 3405 } 3406 3407 #undef __FUNCT__ 3408 #define __FUNCT__ "MatCreateMPIBAIJ" 3409 /*@C 3410 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 3411 (block compressed row). For good matrix assembly performance 3412 the user should preallocate the matrix storage by setting the parameters 3413 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3414 performance can be increased by more than a factor of 50. 3415 3416 Collective on MPI_Comm 3417 3418 Input Parameters: 3419 + comm - MPI communicator 3420 . bs - size of blockk 3421 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3422 This value should be the same as the local size used in creating the 3423 y vector for the matrix-vector product y = Ax. 3424 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3425 This value should be the same as the local size used in creating the 3426 x vector for the matrix-vector product y = Ax. 3427 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3428 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3429 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3430 submatrix (same for all local rows) 3431 . d_nnz - array containing the number of nonzero blocks in the various block rows 3432 of the in diagonal portion of the local (possibly different for each block 3433 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3434 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3435 submatrix (same for all local rows). 3436 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3437 off-diagonal portion of the local submatrix (possibly different for 3438 each block row) or PETSC_NULL. 3439 3440 Output Parameter: 3441 . A - the matrix 3442 3443 Options Database Keys: 3444 + -mat_block_size - size of the blocks to use 3445 - -mat_use_hash_table <fact> 3446 3447 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3448 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3449 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3450 3451 Notes: 3452 If the *_nnz parameter is given then the *_nz parameter is ignored 3453 3454 A nonzero block is any block that as 1 or more nonzeros in it 3455 3456 The user MUST specify either the local or global matrix dimensions 3457 (possibly both). 3458 3459 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3460 than it must be used on all processors that share the object for that argument. 3461 3462 Storage Information: 3463 For a square global matrix we define each processor's diagonal portion 3464 to be its local rows and the corresponding columns (a square submatrix); 3465 each processor's off-diagonal portion encompasses the remainder of the 3466 local matrix (a rectangular submatrix). 3467 3468 The user can specify preallocated storage for the diagonal part of 3469 the local submatrix with either d_nz or d_nnz (not both). Set 3470 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3471 memory allocation. Likewise, specify preallocated storage for the 3472 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3473 3474 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3475 the figure below we depict these three local rows and all columns (0-11). 3476 3477 .vb 3478 0 1 2 3 4 5 6 7 8 9 10 11 3479 ------------------- 3480 row 3 | o o o d d d o o o o o o 3481 row 4 | o o o d d d o o o o o o 3482 row 5 | o o o d d d o o o o o o 3483 ------------------- 3484 .ve 3485 3486 Thus, any entries in the d locations are stored in the d (diagonal) 3487 submatrix, and any entries in the o locations are stored in the 3488 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3489 stored simply in the MATSEQBAIJ format for compressed row storage. 3490 3491 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3492 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3493 In general, for PDE problems in which most nonzeros are near the diagonal, 3494 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3495 or you will get TERRIBLE performance; see the users' manual chapter on 3496 matrices. 3497 3498 Level: intermediate 3499 3500 .keywords: matrix, block, aij, compressed row, sparse, parallel 3501 3502 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3503 @*/ 3504 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3505 { 3506 PetscErrorCode ierr; 3507 PetscMPIInt size; 3508 3509 PetscFunctionBegin; 3510 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3511 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3512 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3513 if (size > 1) { 3514 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3515 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3516 } else { 3517 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3518 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3519 } 3520 PetscFunctionReturn(0); 3521 } 3522 3523 #undef __FUNCT__ 3524 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3525 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3526 { 3527 Mat mat; 3528 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3529 PetscErrorCode ierr; 3530 PetscInt len=0; 3531 3532 PetscFunctionBegin; 3533 *newmat = 0; 3534 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3535 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3536 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3537 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3538 3539 mat->factortype = matin->factortype; 3540 mat->preallocated = PETSC_TRUE; 3541 mat->assembled = PETSC_TRUE; 3542 mat->insertmode = NOT_SET_VALUES; 3543 3544 a = (Mat_MPIBAIJ*)mat->data; 3545 mat->rmap->bs = matin->rmap->bs; 3546 a->bs2 = oldmat->bs2; 3547 a->mbs = oldmat->mbs; 3548 a->nbs = oldmat->nbs; 3549 a->Mbs = oldmat->Mbs; 3550 a->Nbs = oldmat->Nbs; 3551 3552 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3553 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3554 3555 a->size = oldmat->size; 3556 a->rank = oldmat->rank; 3557 a->donotstash = oldmat->donotstash; 3558 a->roworiented = oldmat->roworiented; 3559 a->rowindices = 0; 3560 a->rowvalues = 0; 3561 a->getrowactive = PETSC_FALSE; 3562 a->barray = 0; 3563 a->rstartbs = oldmat->rstartbs; 3564 a->rendbs = oldmat->rendbs; 3565 a->cstartbs = oldmat->cstartbs; 3566 a->cendbs = oldmat->cendbs; 3567 3568 /* hash table stuff */ 3569 a->ht = 0; 3570 a->hd = 0; 3571 a->ht_size = 0; 3572 a->ht_flag = oldmat->ht_flag; 3573 a->ht_fact = oldmat->ht_fact; 3574 a->ht_total_ct = 0; 3575 a->ht_insert_ct = 0; 3576 3577 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3578 if (oldmat->colmap) { 3579 #if defined (PETSC_USE_CTABLE) 3580 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3581 #else 3582 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3583 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3584 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3585 #endif 3586 } else a->colmap = 0; 3587 3588 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3589 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3590 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3591 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3592 } else a->garray = 0; 3593 3594 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3595 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3596 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3597 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3598 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3599 3600 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3601 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3602 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3603 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3604 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3605 *newmat = mat; 3606 3607 PetscFunctionReturn(0); 3608 } 3609 3610 #undef __FUNCT__ 3611 #define __FUNCT__ "MatLoad_MPIBAIJ" 3612 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3613 { 3614 PetscErrorCode ierr; 3615 int fd; 3616 PetscInt i,nz,j,rstart,rend; 3617 PetscScalar *vals,*buf; 3618 MPI_Comm comm = ((PetscObject)viewer)->comm; 3619 MPI_Status status; 3620 PetscMPIInt rank,size,maxnz; 3621 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3622 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3623 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3624 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3625 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3626 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3627 3628 PetscFunctionBegin; 3629 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3630 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3631 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3632 3633 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3634 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3635 if (!rank) { 3636 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3637 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3638 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3639 } 3640 3641 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3642 3643 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3644 M = header[1]; N = header[2]; 3645 3646 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3647 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3648 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3649 3650 /* If global sizes are set, check if they are consistent with that given in the file */ 3651 if (sizesset) { 3652 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3653 } 3654 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3655 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3656 3657 if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices"); 3658 3659 /* 3660 This code adds extra rows to make sure the number of rows is 3661 divisible by the blocksize 3662 */ 3663 Mbs = M/bs; 3664 extra_rows = bs - M + bs*Mbs; 3665 if (extra_rows == bs) extra_rows = 0; 3666 else Mbs++; 3667 if (extra_rows && !rank) { 3668 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3669 } 3670 3671 /* determine ownership of all rows */ 3672 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3673 mbs = Mbs/size + ((Mbs % size) > rank); 3674 m = mbs*bs; 3675 } else { /* User set */ 3676 m = newmat->rmap->n; 3677 mbs = m/bs; 3678 } 3679 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3680 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3681 3682 /* process 0 needs enough room for process with most rows */ 3683 if (!rank) { 3684 mmax = rowners[1]; 3685 for (i=2; i<size; i++) { 3686 mmax = PetscMax(mmax,rowners[i]); 3687 } 3688 mmax*=bs; 3689 } else mmax = m; 3690 3691 rowners[0] = 0; 3692 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3693 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3694 rstart = rowners[rank]; 3695 rend = rowners[rank+1]; 3696 3697 /* distribute row lengths to all processors */ 3698 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3699 if (!rank) { 3700 mend = m; 3701 if (size == 1) mend = mend - extra_rows; 3702 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3703 for (j=mend; j<m; j++) locrowlens[j] = 1; 3704 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3705 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3706 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3707 for (j=0; j<m; j++) { 3708 procsnz[0] += locrowlens[j]; 3709 } 3710 for (i=1; i<size; i++) { 3711 mend = browners[i+1] - browners[i]; 3712 if (i == size-1) mend = mend - extra_rows; 3713 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3714 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3715 /* calculate the number of nonzeros on each processor */ 3716 for (j=0; j<browners[i+1]-browners[i]; j++) { 3717 procsnz[i] += rowlengths[j]; 3718 } 3719 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3720 } 3721 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3722 } else { 3723 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3724 } 3725 3726 if (!rank) { 3727 /* determine max buffer needed and allocate it */ 3728 maxnz = procsnz[0]; 3729 for (i=1; i<size; i++) { 3730 maxnz = PetscMax(maxnz,procsnz[i]); 3731 } 3732 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3733 3734 /* read in my part of the matrix column indices */ 3735 nz = procsnz[0]; 3736 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3737 mycols = ibuf; 3738 if (size == 1) nz -= extra_rows; 3739 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3740 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3741 3742 /* read in every ones (except the last) and ship off */ 3743 for (i=1; i<size-1; i++) { 3744 nz = procsnz[i]; 3745 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3746 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3747 } 3748 /* read in the stuff for the last proc */ 3749 if (size != 1) { 3750 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3751 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3752 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3753 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3754 } 3755 ierr = PetscFree(cols);CHKERRQ(ierr); 3756 } else { 3757 /* determine buffer space needed for message */ 3758 nz = 0; 3759 for (i=0; i<m; i++) { 3760 nz += locrowlens[i]; 3761 } 3762 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3763 mycols = ibuf; 3764 /* receive message of column indices*/ 3765 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3766 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3767 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3768 } 3769 3770 /* loop over local rows, determining number of off diagonal entries */ 3771 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3772 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3773 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3774 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3775 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3776 rowcount = 0; nzcount = 0; 3777 for (i=0; i<mbs; i++) { 3778 dcount = 0; 3779 odcount = 0; 3780 for (j=0; j<bs; j++) { 3781 kmax = locrowlens[rowcount]; 3782 for (k=0; k<kmax; k++) { 3783 tmp = mycols[nzcount++]/bs; 3784 if (!mask[tmp]) { 3785 mask[tmp] = 1; 3786 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3787 else masked1[dcount++] = tmp; 3788 } 3789 } 3790 rowcount++; 3791 } 3792 3793 dlens[i] = dcount; 3794 odlens[i] = odcount; 3795 3796 /* zero out the mask elements we set */ 3797 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3798 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3799 } 3800 3801 3802 if (!sizesset) { 3803 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3804 } 3805 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3806 3807 if (!rank) { 3808 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3809 /* read in my part of the matrix numerical values */ 3810 nz = procsnz[0]; 3811 vals = buf; 3812 mycols = ibuf; 3813 if (size == 1) nz -= extra_rows; 3814 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3815 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3816 3817 /* insert into matrix */ 3818 jj = rstart*bs; 3819 for (i=0; i<m; i++) { 3820 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3821 mycols += locrowlens[i]; 3822 vals += locrowlens[i]; 3823 jj++; 3824 } 3825 /* read in other processors (except the last one) and ship out */ 3826 for (i=1; i<size-1; i++) { 3827 nz = procsnz[i]; 3828 vals = buf; 3829 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3830 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3831 } 3832 /* the last proc */ 3833 if (size != 1){ 3834 nz = procsnz[i] - extra_rows; 3835 vals = buf; 3836 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3837 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3838 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3839 } 3840 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3841 } else { 3842 /* receive numeric values */ 3843 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3844 3845 /* receive message of values*/ 3846 vals = buf; 3847 mycols = ibuf; 3848 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 3849 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 3850 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3851 3852 /* insert into matrix */ 3853 jj = rstart*bs; 3854 for (i=0; i<m; i++) { 3855 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3856 mycols += locrowlens[i]; 3857 vals += locrowlens[i]; 3858 jj++; 3859 } 3860 } 3861 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3862 ierr = PetscFree(buf);CHKERRQ(ierr); 3863 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3864 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3865 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3866 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3867 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3868 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3869 3870 PetscFunctionReturn(0); 3871 } 3872 3873 #undef __FUNCT__ 3874 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3875 /*@ 3876 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3877 3878 Input Parameters: 3879 . mat - the matrix 3880 . fact - factor 3881 3882 Not Collective, each process can use a different factor 3883 3884 Level: advanced 3885 3886 Notes: 3887 This can also be set by the command line option: -mat_use_hash_table <fact> 3888 3889 .keywords: matrix, hashtable, factor, HT 3890 3891 .seealso: MatSetOption() 3892 @*/ 3893 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3894 { 3895 PetscErrorCode ierr; 3896 3897 PetscFunctionBegin; 3898 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3899 PetscFunctionReturn(0); 3900 } 3901 3902 EXTERN_C_BEGIN 3903 #undef __FUNCT__ 3904 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3905 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3906 { 3907 Mat_MPIBAIJ *baij; 3908 3909 PetscFunctionBegin; 3910 baij = (Mat_MPIBAIJ*)mat->data; 3911 baij->ht_fact = fact; 3912 PetscFunctionReturn(0); 3913 } 3914 EXTERN_C_END 3915 3916 #undef __FUNCT__ 3917 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3918 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3919 { 3920 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3921 PetscFunctionBegin; 3922 *Ad = a->A; 3923 *Ao = a->B; 3924 *colmap = a->garray; 3925 PetscFunctionReturn(0); 3926 } 3927 3928 /* 3929 Special version for direct calls from Fortran (to eliminate two function call overheads 3930 */ 3931 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3932 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3933 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3934 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3935 #endif 3936 3937 #undef __FUNCT__ 3938 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3939 /*@C 3940 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3941 3942 Collective on Mat 3943 3944 Input Parameters: 3945 + mat - the matrix 3946 . min - number of input rows 3947 . im - input rows 3948 . nin - number of input columns 3949 . in - input columns 3950 . v - numerical values input 3951 - addvin - INSERT_VALUES or ADD_VALUES 3952 3953 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3954 3955 Level: advanced 3956 3957 .seealso: MatSetValuesBlocked() 3958 @*/ 3959 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3960 { 3961 /* convert input arguments to C version */ 3962 Mat mat = *matin; 3963 PetscInt m = *min, n = *nin; 3964 InsertMode addv = *addvin; 3965 3966 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3967 const MatScalar *value; 3968 MatScalar *barray=baij->barray; 3969 PetscBool roworiented = baij->roworiented; 3970 PetscErrorCode ierr; 3971 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3972 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3973 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3974 3975 PetscFunctionBegin; 3976 /* tasks normally handled by MatSetValuesBlocked() */ 3977 if (mat->insertmode == NOT_SET_VALUES) { 3978 mat->insertmode = addv; 3979 } 3980 #if defined(PETSC_USE_DEBUG) 3981 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3982 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3983 #endif 3984 if (mat->assembled) { 3985 mat->was_assembled = PETSC_TRUE; 3986 mat->assembled = PETSC_FALSE; 3987 } 3988 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3989 3990 3991 if(!barray) { 3992 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3993 baij->barray = barray; 3994 } 3995 3996 if (roworiented) { 3997 stepval = (n-1)*bs; 3998 } else { 3999 stepval = (m-1)*bs; 4000 } 4001 for (i=0; i<m; i++) { 4002 if (im[i] < 0) continue; 4003 #if defined(PETSC_USE_DEBUG) 4004 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 4005 #endif 4006 if (im[i] >= rstart && im[i] < rend) { 4007 row = im[i] - rstart; 4008 for (j=0; j<n; j++) { 4009 /* If NumCol = 1 then a copy is not required */ 4010 if ((roworiented) && (n == 1)) { 4011 barray = (MatScalar*)v + i*bs2; 4012 } else if((!roworiented) && (m == 1)) { 4013 barray = (MatScalar*)v + j*bs2; 4014 } else { /* Here a copy is required */ 4015 if (roworiented) { 4016 value = v + i*(stepval+bs)*bs + j*bs; 4017 } else { 4018 value = v + j*(stepval+bs)*bs + i*bs; 4019 } 4020 for (ii=0; ii<bs; ii++,value+=stepval) { 4021 for (jj=0; jj<bs; jj++) { 4022 *barray++ = *value++; 4023 } 4024 } 4025 barray -=bs2; 4026 } 4027 4028 if (in[j] >= cstart && in[j] < cend){ 4029 col = in[j] - cstart; 4030 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4031 } 4032 else if (in[j] < 0) continue; 4033 #if defined(PETSC_USE_DEBUG) 4034 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 4035 #endif 4036 else { 4037 if (mat->was_assembled) { 4038 if (!baij->colmap) { 4039 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 4040 } 4041 4042 #if defined(PETSC_USE_DEBUG) 4043 #if defined (PETSC_USE_CTABLE) 4044 { PetscInt data; 4045 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 4046 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4047 } 4048 #else 4049 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4050 #endif 4051 #endif 4052 #if defined (PETSC_USE_CTABLE) 4053 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4054 col = (col - 1)/bs; 4055 #else 4056 col = (baij->colmap[in[j]] - 1)/bs; 4057 #endif 4058 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 4059 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 4060 col = in[j]; 4061 } 4062 } 4063 else col = in[j]; 4064 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4065 } 4066 } 4067 } else { 4068 if (!baij->donotstash) { 4069 if (roworiented) { 4070 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4071 } else { 4072 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4073 } 4074 } 4075 } 4076 } 4077 4078 /* task normally handled by MatSetValuesBlocked() */ 4079 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4080 PetscFunctionReturn(0); 4081 } 4082 4083 #undef __FUNCT__ 4084 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 4085 /*@ 4086 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 4087 CSR format the local rows. 4088 4089 Collective on MPI_Comm 4090 4091 Input Parameters: 4092 + comm - MPI communicator 4093 . bs - the block size, only a block size of 1 is supported 4094 . m - number of local rows (Cannot be PETSC_DECIDE) 4095 . n - This value should be the same as the local size used in creating the 4096 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4097 calculated if N is given) For square matrices n is almost always m. 4098 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4099 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4100 . i - row indices 4101 . j - column indices 4102 - a - matrix values 4103 4104 Output Parameter: 4105 . mat - the matrix 4106 4107 Level: intermediate 4108 4109 Notes: 4110 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4111 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4112 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4113 4114 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4115 4116 .keywords: matrix, aij, compressed row, sparse, parallel 4117 4118 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4119 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 4120 @*/ 4121 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4122 { 4123 PetscErrorCode ierr; 4124 4125 4126 PetscFunctionBegin; 4127 if (i[0]) { 4128 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4129 } 4130 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4131 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4132 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4133 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 4134 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 4135 PetscFunctionReturn(0); 4136 } 4137