1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat); 7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec); 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 17 { 18 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 19 PetscErrorCode ierr; 20 PetscInt i,*idxb = 0; 21 PetscScalar *va,*vb; 22 Vec vtmp; 23 24 PetscFunctionBegin; 25 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 26 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 27 if (idx) { 28 for (i=0; i<A->rmap->n; i++) { 29 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 30 } 31 } 32 33 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 34 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 35 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 36 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 37 38 for (i=0; i<A->rmap->n; i++) { 39 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 40 va[i] = vb[i]; 41 if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs); 42 } 43 } 44 45 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 46 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 47 ierr = PetscFree(idxb);CHKERRQ(ierr); 48 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 49 PetscFunctionReturn(0); 50 } 51 52 EXTERN_C_BEGIN 53 #undef __FUNCT__ 54 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 55 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 56 { 57 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 58 PetscErrorCode ierr; 59 60 PetscFunctionBegin; 61 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 62 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 63 PetscFunctionReturn(0); 64 } 65 EXTERN_C_END 66 67 EXTERN_C_BEGIN 68 #undef __FUNCT__ 69 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 70 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 71 { 72 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 73 PetscErrorCode ierr; 74 75 PetscFunctionBegin; 76 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 77 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 EXTERN_C_END 81 82 /* 83 Local utility routine that creates a mapping from the global column 84 number to the local number in the off-diagonal part of the local 85 storage of the matrix. This is done in a non scalable way since the 86 length of colmap equals the global matrix length. 87 */ 88 #undef __FUNCT__ 89 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private" 90 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 91 { 92 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 93 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 94 PetscErrorCode ierr; 95 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 96 97 PetscFunctionBegin; 98 #if defined(PETSC_USE_CTABLE) 99 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 100 for (i=0; i<nbs; i++) { 101 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 102 } 103 #else 104 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 105 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 106 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 107 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 108 #endif 109 PetscFunctionReturn(0); 110 } 111 112 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 113 { \ 114 \ 115 brow = row/bs; \ 116 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 117 rmax = aimax[brow]; nrow = ailen[brow]; \ 118 bcol = col/bs; \ 119 ridx = row % bs; cidx = col % bs; \ 120 low = 0; high = nrow; \ 121 while (high-low > 3) { \ 122 t = (low+high)/2; \ 123 if (rp[t] > bcol) high = t; \ 124 else low = t; \ 125 } \ 126 for (_i=low; _i<high; _i++) { \ 127 if (rp[_i] > bcol) break; \ 128 if (rp[_i] == bcol) { \ 129 bap = ap + bs2*_i + bs*cidx + ridx; \ 130 if (addv == ADD_VALUES) *bap += value; \ 131 else *bap = value; \ 132 goto a_noinsert; \ 133 } \ 134 } \ 135 if (a->nonew == 1) goto a_noinsert; \ 136 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 137 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 138 N = nrow++ - 1; \ 139 /* shift up all the later entries in this row */ \ 140 for (ii=N; ii>=_i; ii--) { \ 141 rp[ii+1] = rp[ii]; \ 142 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 143 } \ 144 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 145 rp[_i] = bcol; \ 146 ap[bs2*_i + bs*cidx + ridx] = value; \ 147 a_noinsert:; \ 148 ailen[brow] = nrow; \ 149 } 150 151 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 152 { \ 153 brow = row/bs; \ 154 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 155 rmax = bimax[brow]; nrow = bilen[brow]; \ 156 bcol = col/bs; \ 157 ridx = row % bs; cidx = col % bs; \ 158 low = 0; high = nrow; \ 159 while (high-low > 3) { \ 160 t = (low+high)/2; \ 161 if (rp[t] > bcol) high = t; \ 162 else low = t; \ 163 } \ 164 for (_i=low; _i<high; _i++) { \ 165 if (rp[_i] > bcol) break; \ 166 if (rp[_i] == bcol) { \ 167 bap = ap + bs2*_i + bs*cidx + ridx; \ 168 if (addv == ADD_VALUES) *bap += value; \ 169 else *bap = value; \ 170 goto b_noinsert; \ 171 } \ 172 } \ 173 if (b->nonew == 1) goto b_noinsert; \ 174 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 175 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 176 CHKMEMQ; \ 177 N = nrow++ - 1; \ 178 /* shift up all the later entries in this row */ \ 179 for (ii=N; ii>=_i; ii--) { \ 180 rp[ii+1] = rp[ii]; \ 181 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 182 } \ 183 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 184 rp[_i] = bcol; \ 185 ap[bs2*_i + bs*cidx + ridx] = value; \ 186 b_noinsert:; \ 187 bilen[brow] = nrow; \ 188 } 189 190 #undef __FUNCT__ 191 #define __FUNCT__ "MatSetValues_MPIBAIJ" 192 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 193 { 194 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 195 MatScalar value; 196 PetscBool roworiented = baij->roworiented; 197 PetscErrorCode ierr; 198 PetscInt i,j,row,col; 199 PetscInt rstart_orig=mat->rmap->rstart; 200 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 201 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 202 203 /* Some Variables required in the macro */ 204 Mat A = baij->A; 205 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 206 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 207 MatScalar *aa =a->a; 208 209 Mat B = baij->B; 210 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 211 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 212 MatScalar *ba =b->a; 213 214 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 215 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 216 MatScalar *ap,*bap; 217 218 PetscFunctionBegin; 219 if (v) PetscValidScalarPointer(v,6); 220 for (i=0; i<m; i++) { 221 if (im[i] < 0) continue; 222 #if defined(PETSC_USE_DEBUG) 223 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 224 #endif 225 if (im[i] >= rstart_orig && im[i] < rend_orig) { 226 row = im[i] - rstart_orig; 227 for (j=0; j<n; j++) { 228 if (in[j] >= cstart_orig && in[j] < cend_orig) { 229 col = in[j] - cstart_orig; 230 if (roworiented) value = v[i*n+j]; 231 else value = v[i+j*m]; 232 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 233 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 234 } else if (in[j] < 0) continue; 235 #if defined(PETSC_USE_DEBUG) 236 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 237 #endif 238 else { 239 if (mat->was_assembled) { 240 if (!baij->colmap) { 241 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 242 } 243 #if defined(PETSC_USE_CTABLE) 244 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 245 col = col - 1; 246 #else 247 col = baij->colmap[in[j]/bs] - 1; 248 #endif 249 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 250 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 251 col = in[j]; 252 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 253 B = baij->B; 254 b = (Mat_SeqBAIJ*)(B)->data; 255 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 256 ba =b->a; 257 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 258 else col += in[j]%bs; 259 } else col = in[j]; 260 if (roworiented) value = v[i*n+j]; 261 else value = v[i+j*m]; 262 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 263 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 264 } 265 } 266 } else { 267 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 268 if (!baij->donotstash) { 269 mat->assembled = PETSC_FALSE; 270 if (roworiented) { 271 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 272 } else { 273 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 274 } 275 } 276 } 277 } 278 PetscFunctionReturn(0); 279 } 280 281 #undef __FUNCT__ 282 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 283 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 284 { 285 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 286 const PetscScalar *value; 287 MatScalar *barray = baij->barray; 288 PetscBool roworiented = baij->roworiented; 289 PetscErrorCode ierr; 290 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 291 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 292 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 293 294 PetscFunctionBegin; 295 if (!barray) { 296 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 297 baij->barray = barray; 298 } 299 300 if (roworiented) stepval = (n-1)*bs; 301 else stepval = (m-1)*bs; 302 303 for (i=0; i<m; i++) { 304 if (im[i] < 0) continue; 305 #if defined(PETSC_USE_DEBUG) 306 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 307 #endif 308 if (im[i] >= rstart && im[i] < rend) { 309 row = im[i] - rstart; 310 for (j=0; j<n; j++) { 311 /* If NumCol = 1 then a copy is not required */ 312 if ((roworiented) && (n == 1)) { 313 barray = (MatScalar*)v + i*bs2; 314 } else if ((!roworiented) && (m == 1)) { 315 barray = (MatScalar*)v + j*bs2; 316 } else { /* Here a copy is required */ 317 if (roworiented) { 318 value = v + (i*(stepval+bs) + j)*bs; 319 } else { 320 value = v + (j*(stepval+bs) + i)*bs; 321 } 322 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 323 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 324 barray += bs; 325 } 326 barray -= bs2; 327 } 328 329 if (in[j] >= cstart && in[j] < cend) { 330 col = in[j] - cstart; 331 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 332 } else if (in[j] < 0) continue; 333 #if defined(PETSC_USE_DEBUG) 334 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 335 #endif 336 else { 337 if (mat->was_assembled) { 338 if (!baij->colmap) { 339 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 340 } 341 342 #if defined(PETSC_USE_DEBUG) 343 #if defined(PETSC_USE_CTABLE) 344 { PetscInt data; 345 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 346 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 347 } 348 #else 349 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 350 #endif 351 #endif 352 #if defined(PETSC_USE_CTABLE) 353 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 354 col = (col - 1)/bs; 355 #else 356 col = (baij->colmap[in[j]] - 1)/bs; 357 #endif 358 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 359 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 360 col = in[j]; 361 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]); 362 } else col = in[j]; 363 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 364 } 365 } 366 } else { 367 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 368 if (!baij->donotstash) { 369 if (roworiented) { 370 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 371 } else { 372 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 373 } 374 } 375 } 376 } 377 PetscFunctionReturn(0); 378 } 379 380 #define HASH_KEY 0.6180339887 381 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 382 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 383 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 384 #undef __FUNCT__ 385 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 386 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 387 { 388 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 389 PetscBool roworiented = baij->roworiented; 390 PetscErrorCode ierr; 391 PetscInt i,j,row,col; 392 PetscInt rstart_orig=mat->rmap->rstart; 393 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 394 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 395 PetscReal tmp; 396 MatScalar **HD = baij->hd,value; 397 #if defined(PETSC_USE_DEBUG) 398 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 399 #endif 400 401 PetscFunctionBegin; 402 if (v) PetscValidScalarPointer(v,6); 403 for (i=0; i<m; i++) { 404 #if defined(PETSC_USE_DEBUG) 405 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 406 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 407 #endif 408 row = im[i]; 409 if (row >= rstart_orig && row < rend_orig) { 410 for (j=0; j<n; j++) { 411 col = in[j]; 412 if (roworiented) value = v[i*n+j]; 413 else value = v[i+j*m]; 414 /* Look up PetscInto the Hash Table */ 415 key = (row/bs)*Nbs+(col/bs)+1; 416 h1 = HASH(size,key,tmp); 417 418 419 idx = h1; 420 #if defined(PETSC_USE_DEBUG) 421 insert_ct++; 422 total_ct++; 423 if (HT[idx] != key) { 424 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 425 if (idx == size) { 426 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 427 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 428 } 429 } 430 #else 431 if (HT[idx] != key) { 432 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 433 if (idx == size) { 434 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 435 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 436 } 437 } 438 #endif 439 /* A HASH table entry is found, so insert the values at the correct address */ 440 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 441 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 442 } 443 } else if (!baij->donotstash) { 444 if (roworiented) { 445 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 446 } else { 447 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 448 } 449 } 450 } 451 #if defined(PETSC_USE_DEBUG) 452 baij->ht_total_ct = total_ct; 453 baij->ht_insert_ct = insert_ct; 454 #endif 455 PetscFunctionReturn(0); 456 } 457 458 #undef __FUNCT__ 459 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 460 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 461 { 462 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 463 PetscBool roworiented = baij->roworiented; 464 PetscErrorCode ierr; 465 PetscInt i,j,ii,jj,row,col; 466 PetscInt rstart=baij->rstartbs; 467 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 468 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 469 PetscReal tmp; 470 MatScalar **HD = baij->hd,*baij_a; 471 const PetscScalar *v_t,*value; 472 #if defined(PETSC_USE_DEBUG) 473 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 474 #endif 475 476 PetscFunctionBegin; 477 if (roworiented) stepval = (n-1)*bs; 478 else stepval = (m-1)*bs; 479 480 for (i=0; i<m; i++) { 481 #if defined(PETSC_USE_DEBUG) 482 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 483 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 484 #endif 485 row = im[i]; 486 v_t = v + i*nbs2; 487 if (row >= rstart && row < rend) { 488 for (j=0; j<n; j++) { 489 col = in[j]; 490 491 /* Look up into the Hash Table */ 492 key = row*Nbs+col+1; 493 h1 = HASH(size,key,tmp); 494 495 idx = h1; 496 #if defined(PETSC_USE_DEBUG) 497 total_ct++; 498 insert_ct++; 499 if (HT[idx] != key) { 500 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 501 if (idx == size) { 502 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 503 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 504 } 505 } 506 #else 507 if (HT[idx] != key) { 508 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 509 if (idx == size) { 510 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 511 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 512 } 513 } 514 #endif 515 baij_a = HD[idx]; 516 if (roworiented) { 517 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 518 /* value = v + (i*(stepval+bs)+j)*bs; */ 519 value = v_t; 520 v_t += bs; 521 if (addv == ADD_VALUES) { 522 for (ii=0; ii<bs; ii++,value+=stepval) { 523 for (jj=ii; jj<bs2; jj+=bs) { 524 baij_a[jj] += *value++; 525 } 526 } 527 } else { 528 for (ii=0; ii<bs; ii++,value+=stepval) { 529 for (jj=ii; jj<bs2; jj+=bs) { 530 baij_a[jj] = *value++; 531 } 532 } 533 } 534 } else { 535 value = v + j*(stepval+bs)*bs + i*bs; 536 if (addv == ADD_VALUES) { 537 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 538 for (jj=0; jj<bs; jj++) { 539 baij_a[jj] += *value++; 540 } 541 } 542 } else { 543 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 544 for (jj=0; jj<bs; jj++) { 545 baij_a[jj] = *value++; 546 } 547 } 548 } 549 } 550 } 551 } else { 552 if (!baij->donotstash) { 553 if (roworiented) { 554 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 555 } else { 556 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 557 } 558 } 559 } 560 } 561 #if defined(PETSC_USE_DEBUG) 562 baij->ht_total_ct = total_ct; 563 baij->ht_insert_ct = insert_ct; 564 #endif 565 PetscFunctionReturn(0); 566 } 567 568 #undef __FUNCT__ 569 #define __FUNCT__ "MatGetValues_MPIBAIJ" 570 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 571 { 572 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 573 PetscErrorCode ierr; 574 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 575 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 576 577 PetscFunctionBegin; 578 for (i=0; i<m; i++) { 579 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 580 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 581 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 582 row = idxm[i] - bsrstart; 583 for (j=0; j<n; j++) { 584 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 585 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 586 if (idxn[j] >= bscstart && idxn[j] < bscend) { 587 col = idxn[j] - bscstart; 588 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 589 } else { 590 if (!baij->colmap) { 591 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 592 } 593 #if defined(PETSC_USE_CTABLE) 594 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 595 data--; 596 #else 597 data = baij->colmap[idxn[j]/bs]-1; 598 #endif 599 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 600 else { 601 col = data + idxn[j]%bs; 602 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 603 } 604 } 605 } 606 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 607 } 608 PetscFunctionReturn(0); 609 } 610 611 #undef __FUNCT__ 612 #define __FUNCT__ "MatNorm_MPIBAIJ" 613 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 614 { 615 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 616 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 617 PetscErrorCode ierr; 618 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 619 PetscReal sum = 0.0; 620 MatScalar *v; 621 622 PetscFunctionBegin; 623 if (baij->size == 1) { 624 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 625 } else { 626 if (type == NORM_FROBENIUS) { 627 v = amat->a; 628 nz = amat->nz*bs2; 629 for (i=0; i<nz; i++) { 630 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 631 } 632 v = bmat->a; 633 nz = bmat->nz*bs2; 634 for (i=0; i<nz; i++) { 635 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 636 } 637 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 638 *nrm = PetscSqrtReal(*nrm); 639 } else if (type == NORM_1) { /* max column sum */ 640 PetscReal *tmp,*tmp2; 641 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 642 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 643 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 644 v = amat->a; jj = amat->j; 645 for (i=0; i<amat->nz; i++) { 646 for (j=0; j<bs; j++) { 647 col = bs*(cstart + *jj) + j; /* column index */ 648 for (row=0; row<bs; row++) { 649 tmp[col] += PetscAbsScalar(*v); v++; 650 } 651 } 652 jj++; 653 } 654 v = bmat->a; jj = bmat->j; 655 for (i=0; i<bmat->nz; i++) { 656 for (j=0; j<bs; j++) { 657 col = bs*garray[*jj] + j; 658 for (row=0; row<bs; row++) { 659 tmp[col] += PetscAbsScalar(*v); v++; 660 } 661 } 662 jj++; 663 } 664 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 665 *nrm = 0.0; 666 for (j=0; j<mat->cmap->N; j++) { 667 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 668 } 669 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 670 } else if (type == NORM_INFINITY) { /* max row sum */ 671 PetscReal *sums; 672 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 673 sum = 0.0; 674 for (j=0; j<amat->mbs; j++) { 675 for (row=0; row<bs; row++) sums[row] = 0.0; 676 v = amat->a + bs2*amat->i[j]; 677 nz = amat->i[j+1]-amat->i[j]; 678 for (i=0; i<nz; i++) { 679 for (col=0; col<bs; col++) { 680 for (row=0; row<bs; row++) { 681 sums[row] += PetscAbsScalar(*v); v++; 682 } 683 } 684 } 685 v = bmat->a + bs2*bmat->i[j]; 686 nz = bmat->i[j+1]-bmat->i[j]; 687 for (i=0; i<nz; i++) { 688 for (col=0; col<bs; col++) { 689 for (row=0; row<bs; row++) { 690 sums[row] += PetscAbsScalar(*v); v++; 691 } 692 } 693 } 694 for (row=0; row<bs; row++) { 695 if (sums[row] > sum) sum = sums[row]; 696 } 697 } 698 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 699 ierr = PetscFree(sums);CHKERRQ(ierr); 700 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet"); 701 } 702 PetscFunctionReturn(0); 703 } 704 705 /* 706 Creates the hash table, and sets the table 707 This table is created only once. 708 If new entried need to be added to the matrix 709 then the hash table has to be destroyed and 710 recreated. 711 */ 712 #undef __FUNCT__ 713 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 714 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 715 { 716 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 717 Mat A = baij->A,B=baij->B; 718 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 719 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 720 PetscErrorCode ierr; 721 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 722 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 723 PetscInt *HT,key; 724 MatScalar **HD; 725 PetscReal tmp; 726 #if defined(PETSC_USE_INFO) 727 PetscInt ct=0,max=0; 728 #endif 729 730 PetscFunctionBegin; 731 if (baij->ht) PetscFunctionReturn(0); 732 733 baij->ht_size = (PetscInt)(factor*nz); 734 ht_size = baij->ht_size; 735 736 /* Allocate Memory for Hash Table */ 737 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 738 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 739 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 740 HD = baij->hd; 741 HT = baij->ht; 742 743 /* Loop Over A */ 744 for (i=0; i<a->mbs; i++) { 745 for (j=ai[i]; j<ai[i+1]; j++) { 746 row = i+rstart; 747 col = aj[j]+cstart; 748 749 key = row*Nbs + col + 1; 750 h1 = HASH(ht_size,key,tmp); 751 for (k=0; k<ht_size; k++) { 752 if (!HT[(h1+k)%ht_size]) { 753 HT[(h1+k)%ht_size] = key; 754 HD[(h1+k)%ht_size] = a->a + j*bs2; 755 break; 756 #if defined(PETSC_USE_INFO) 757 } else { 758 ct++; 759 #endif 760 } 761 } 762 #if defined(PETSC_USE_INFO) 763 if (k> max) max = k; 764 #endif 765 } 766 } 767 /* Loop Over B */ 768 for (i=0; i<b->mbs; i++) { 769 for (j=bi[i]; j<bi[i+1]; j++) { 770 row = i+rstart; 771 col = garray[bj[j]]; 772 key = row*Nbs + col + 1; 773 h1 = HASH(ht_size,key,tmp); 774 for (k=0; k<ht_size; k++) { 775 if (!HT[(h1+k)%ht_size]) { 776 HT[(h1+k)%ht_size] = key; 777 HD[(h1+k)%ht_size] = b->a + j*bs2; 778 break; 779 #if defined(PETSC_USE_INFO) 780 } else { 781 ct++; 782 #endif 783 } 784 } 785 #if defined(PETSC_USE_INFO) 786 if (k> max) max = k; 787 #endif 788 } 789 } 790 791 /* Print Summary */ 792 #if defined(PETSC_USE_INFO) 793 for (i=0,j=0; i<ht_size; i++) { 794 if (HT[i]) j++; 795 } 796 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 797 #endif 798 PetscFunctionReturn(0); 799 } 800 801 #undef __FUNCT__ 802 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 803 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 804 { 805 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 806 PetscErrorCode ierr; 807 PetscInt nstash,reallocs; 808 InsertMode addv; 809 810 PetscFunctionBegin; 811 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 812 813 /* make sure all processors are either in INSERTMODE or ADDMODE */ 814 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 815 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 816 mat->insertmode = addv; /* in case this processor had no cache */ 817 818 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 819 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 820 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 821 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 822 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 823 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 824 PetscFunctionReturn(0); 825 } 826 827 #undef __FUNCT__ 828 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 829 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 830 { 831 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 832 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 833 PetscErrorCode ierr; 834 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 835 PetscInt *row,*col; 836 PetscBool r1,r2,r3,other_disassembled; 837 MatScalar *val; 838 InsertMode addv = mat->insertmode; 839 PetscMPIInt n; 840 841 PetscFunctionBegin; 842 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 843 if (!baij->donotstash && !mat->nooffprocentries) { 844 while (1) { 845 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 846 if (!flg) break; 847 848 for (i=0; i<n;) { 849 /* Now identify the consecutive vals belonging to the same row */ 850 for (j=i,rstart=row[j]; j<n; j++) { 851 if (row[j] != rstart) break; 852 } 853 if (j < n) ncols = j-i; 854 else ncols = n-i; 855 /* Now assemble all these values with a single function call */ 856 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 857 i = j; 858 } 859 } 860 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 861 /* Now process the block-stash. Since the values are stashed column-oriented, 862 set the roworiented flag to column oriented, and after MatSetValues() 863 restore the original flags */ 864 r1 = baij->roworiented; 865 r2 = a->roworiented; 866 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 867 868 baij->roworiented = PETSC_FALSE; 869 a->roworiented = PETSC_FALSE; 870 871 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 872 while (1) { 873 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 874 if (!flg) break; 875 876 for (i=0; i<n;) { 877 /* Now identify the consecutive vals belonging to the same row */ 878 for (j=i,rstart=row[j]; j<n; j++) { 879 if (row[j] != rstart) break; 880 } 881 if (j < n) ncols = j-i; 882 else ncols = n-i; 883 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 884 i = j; 885 } 886 } 887 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 888 889 baij->roworiented = r1; 890 a->roworiented = r2; 891 892 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 893 } 894 895 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 896 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 897 898 /* determine if any processor has disassembled, if so we must 899 also disassemble ourselfs, in order that we may reassemble. */ 900 /* 901 if nonzero structure of submatrix B cannot change then we know that 902 no processor disassembled thus we can skip this stuff 903 */ 904 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 905 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 906 if (mat->was_assembled && !other_disassembled) { 907 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 908 } 909 } 910 911 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 912 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 913 } 914 ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 915 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 916 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 917 918 #if defined(PETSC_USE_INFO) 919 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 920 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 921 922 baij->ht_total_ct = 0; 923 baij->ht_insert_ct = 0; 924 } 925 #endif 926 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 927 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 928 929 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 930 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 931 } 932 933 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 934 935 baij->rowvalues = 0; 936 PetscFunctionReturn(0); 937 } 938 939 #undef __FUNCT__ 940 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 941 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 942 { 943 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 944 PetscErrorCode ierr; 945 PetscMPIInt size = baij->size,rank = baij->rank; 946 PetscInt bs = mat->rmap->bs; 947 PetscBool iascii,isdraw; 948 PetscViewer sviewer; 949 PetscViewerFormat format; 950 951 PetscFunctionBegin; 952 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 953 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 954 if (iascii) { 955 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 956 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 957 MatInfo info; 958 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 959 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 960 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 961 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 962 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 963 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 964 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 965 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 966 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 967 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 968 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 969 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 970 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 971 PetscFunctionReturn(0); 972 } else if (format == PETSC_VIEWER_ASCII_INFO) { 973 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 974 PetscFunctionReturn(0); 975 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 976 PetscFunctionReturn(0); 977 } 978 } 979 980 if (isdraw) { 981 PetscDraw draw; 982 PetscBool isnull; 983 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 984 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 985 } 986 987 if (size == 1) { 988 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 989 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 990 } else { 991 /* assemble the entire matrix onto first processor. */ 992 Mat A; 993 Mat_SeqBAIJ *Aloc; 994 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 995 MatScalar *a; 996 997 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 998 /* Perhaps this should be the type of mat? */ 999 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1000 if (!rank) { 1001 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1002 } else { 1003 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1004 } 1005 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1006 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1007 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1008 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1009 1010 /* copy over the A part */ 1011 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1012 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1013 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1014 1015 for (i=0; i<mbs; i++) { 1016 rvals[0] = bs*(baij->rstartbs + i); 1017 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1018 for (j=ai[i]; j<ai[i+1]; j++) { 1019 col = (baij->cstartbs+aj[j])*bs; 1020 for (k=0; k<bs; k++) { 1021 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1022 col++; a += bs; 1023 } 1024 } 1025 } 1026 /* copy over the B part */ 1027 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1028 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1029 for (i=0; i<mbs; i++) { 1030 rvals[0] = bs*(baij->rstartbs + i); 1031 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1032 for (j=ai[i]; j<ai[i+1]; j++) { 1033 col = baij->garray[aj[j]]*bs; 1034 for (k=0; k<bs; k++) { 1035 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1036 col++; a += bs; 1037 } 1038 } 1039 } 1040 ierr = PetscFree(rvals);CHKERRQ(ierr); 1041 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1042 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1043 /* 1044 Everyone has to call to draw the matrix since the graphics waits are 1045 synchronized across all processors that share the PetscDraw object 1046 */ 1047 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1048 if (!rank) { 1049 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1050 /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/ 1051 PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ); 1052 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1053 } 1054 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1055 ierr = MatDestroy(&A);CHKERRQ(ierr); 1056 } 1057 PetscFunctionReturn(0); 1058 } 1059 1060 #undef __FUNCT__ 1061 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1062 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1063 { 1064 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1065 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)a->A->data; 1066 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 1067 PetscErrorCode ierr; 1068 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1069 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1070 int fd; 1071 PetscScalar *column_values; 1072 FILE *file; 1073 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1074 PetscInt message_count,flowcontrolcount; 1075 1076 PetscFunctionBegin; 1077 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1078 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1079 nz = bs2*(A->nz + B->nz); 1080 rlen = mat->rmap->n; 1081 if (!rank) { 1082 header[0] = MAT_FILE_CLASSID; 1083 header[1] = mat->rmap->N; 1084 header[2] = mat->cmap->N; 1085 1086 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1087 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1088 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1089 /* get largest number of rows any processor has */ 1090 range = mat->rmap->range; 1091 for (i=1; i<size; i++) { 1092 rlen = PetscMax(rlen,range[i+1] - range[i]); 1093 } 1094 } else { 1095 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1096 } 1097 1098 ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr); 1099 /* compute lengths of each row */ 1100 for (i=0; i<a->mbs; i++) { 1101 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1102 } 1103 /* store the row lengths to the file */ 1104 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1105 if (!rank) { 1106 MPI_Status status; 1107 ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr); 1108 rlen = (range[1] - range[0])/bs; 1109 for (i=0; i<rlen; i++) { 1110 for (j=0; j<bs; j++) { 1111 row_lens[i*bs+j] = bs*crow_lens[i]; 1112 } 1113 } 1114 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1115 for (i=1; i<size; i++) { 1116 rlen = (range[i+1] - range[i])/bs; 1117 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1118 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1119 for (k=0; k<rlen; k++) { 1120 for (j=0; j<bs; j++) { 1121 row_lens[k*bs+j] = bs*crow_lens[k]; 1122 } 1123 } 1124 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1125 } 1126 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1127 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1128 } else { 1129 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1130 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1131 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1132 } 1133 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1134 1135 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1136 information needed to make it for each row from a block row. This does require more communication but still not more than 1137 the communication needed for the nonzero values */ 1138 nzmax = nz; /* space a largest processor needs */ 1139 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1140 ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1141 cnt = 0; 1142 for (i=0; i<a->mbs; i++) { 1143 pcnt = cnt; 1144 for (j=B->i[i]; j<B->i[i+1]; j++) { 1145 if ((col = garray[B->j[j]]) > cstart) break; 1146 for (l=0; l<bs; l++) { 1147 column_indices[cnt++] = bs*col+l; 1148 } 1149 } 1150 for (k=A->i[i]; k<A->i[i+1]; k++) { 1151 for (l=0; l<bs; l++) { 1152 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1153 } 1154 } 1155 for (; j<B->i[i+1]; j++) { 1156 for (l=0; l<bs; l++) { 1157 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1158 } 1159 } 1160 len = cnt - pcnt; 1161 for (k=1; k<bs; k++) { 1162 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1163 cnt += len; 1164 } 1165 } 1166 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1167 1168 /* store the columns to the file */ 1169 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1170 if (!rank) { 1171 MPI_Status status; 1172 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1173 for (i=1; i<size; i++) { 1174 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1175 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1176 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1177 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1178 } 1179 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1180 } else { 1181 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1182 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1183 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1184 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1185 } 1186 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1187 1188 /* load up the numerical values */ 1189 ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1190 cnt = 0; 1191 for (i=0; i<a->mbs; i++) { 1192 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1193 for (j=B->i[i]; j<B->i[i+1]; j++) { 1194 if (garray[B->j[j]] > cstart) break; 1195 for (l=0; l<bs; l++) { 1196 for (ll=0; ll<bs; ll++) { 1197 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1198 } 1199 } 1200 cnt += bs; 1201 } 1202 for (k=A->i[i]; k<A->i[i+1]; k++) { 1203 for (l=0; l<bs; l++) { 1204 for (ll=0; ll<bs; ll++) { 1205 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1206 } 1207 } 1208 cnt += bs; 1209 } 1210 for (; j<B->i[i+1]; j++) { 1211 for (l=0; l<bs; l++) { 1212 for (ll=0; ll<bs; ll++) { 1213 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1214 } 1215 } 1216 cnt += bs; 1217 } 1218 cnt += (bs-1)*rlen; 1219 } 1220 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1221 1222 /* store the column values to the file */ 1223 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1224 if (!rank) { 1225 MPI_Status status; 1226 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1227 for (i=1; i<size; i++) { 1228 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1229 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1230 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1231 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1232 } 1233 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1234 } else { 1235 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1236 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1237 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1238 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1239 } 1240 ierr = PetscFree(column_values);CHKERRQ(ierr); 1241 1242 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1243 if (file) { 1244 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1245 } 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatView_MPIBAIJ" 1251 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1252 { 1253 PetscErrorCode ierr; 1254 PetscBool iascii,isdraw,issocket,isbinary; 1255 1256 PetscFunctionBegin; 1257 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1258 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1259 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1260 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1261 if (iascii || isdraw || issocket) { 1262 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1263 } else if (isbinary) { 1264 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1265 } 1266 PetscFunctionReturn(0); 1267 } 1268 1269 #undef __FUNCT__ 1270 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1271 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1272 { 1273 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1274 PetscErrorCode ierr; 1275 1276 PetscFunctionBegin; 1277 #if defined(PETSC_USE_LOG) 1278 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1279 #endif 1280 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1281 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1282 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1283 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1284 #if defined(PETSC_USE_CTABLE) 1285 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1286 #else 1287 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1288 #endif 1289 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1290 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1291 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1292 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1293 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1294 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1295 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1296 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1297 1298 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1307 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",PETSC_NULL);CHKERRQ(ierr); 1308 PetscFunctionReturn(0); 1309 } 1310 1311 #undef __FUNCT__ 1312 #define __FUNCT__ "MatMult_MPIBAIJ" 1313 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1314 { 1315 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1316 PetscErrorCode ierr; 1317 PetscInt nt; 1318 1319 PetscFunctionBegin; 1320 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1321 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1322 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1323 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1324 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1325 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1326 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1327 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1328 PetscFunctionReturn(0); 1329 } 1330 1331 #undef __FUNCT__ 1332 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1333 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1334 { 1335 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1336 PetscErrorCode ierr; 1337 1338 PetscFunctionBegin; 1339 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1340 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1341 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1342 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1343 PetscFunctionReturn(0); 1344 } 1345 1346 #undef __FUNCT__ 1347 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1348 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1349 { 1350 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1351 PetscErrorCode ierr; 1352 PetscBool merged; 1353 1354 PetscFunctionBegin; 1355 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1356 /* do nondiagonal part */ 1357 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1358 if (!merged) { 1359 /* send it on its way */ 1360 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1361 /* do local part */ 1362 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1363 /* receive remote parts: note this assumes the values are not actually */ 1364 /* inserted in yy until the next line */ 1365 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1366 } else { 1367 /* do local part */ 1368 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1369 /* send it on its way */ 1370 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1371 /* values actually were received in the Begin() but we need to call this nop */ 1372 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1373 } 1374 PetscFunctionReturn(0); 1375 } 1376 1377 #undef __FUNCT__ 1378 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1379 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1380 { 1381 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1382 PetscErrorCode ierr; 1383 1384 PetscFunctionBegin; 1385 /* do nondiagonal part */ 1386 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1387 /* send it on its way */ 1388 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1389 /* do local part */ 1390 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1391 /* receive remote parts: note this assumes the values are not actually */ 1392 /* inserted in yy until the next line, which is true for my implementation*/ 1393 /* but is not perhaps always true. */ 1394 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1395 PetscFunctionReturn(0); 1396 } 1397 1398 /* 1399 This only works correctly for square matrices where the subblock A->A is the 1400 diagonal block 1401 */ 1402 #undef __FUNCT__ 1403 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1404 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1405 { 1406 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1407 PetscErrorCode ierr; 1408 1409 PetscFunctionBegin; 1410 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1411 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1412 PetscFunctionReturn(0); 1413 } 1414 1415 #undef __FUNCT__ 1416 #define __FUNCT__ "MatScale_MPIBAIJ" 1417 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1418 { 1419 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1420 PetscErrorCode ierr; 1421 1422 PetscFunctionBegin; 1423 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1424 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1425 PetscFunctionReturn(0); 1426 } 1427 1428 #undef __FUNCT__ 1429 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1430 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1431 { 1432 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1433 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1434 PetscErrorCode ierr; 1435 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1436 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1437 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1438 1439 PetscFunctionBegin; 1440 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1441 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1442 mat->getrowactive = PETSC_TRUE; 1443 1444 if (!mat->rowvalues && (idx || v)) { 1445 /* 1446 allocate enough space to hold information from the longest row. 1447 */ 1448 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1449 PetscInt max = 1,mbs = mat->mbs,tmp; 1450 for (i=0; i<mbs; i++) { 1451 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1452 if (max < tmp) max = tmp; 1453 } 1454 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1455 } 1456 lrow = row - brstart; 1457 1458 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1459 if (!v) {pvA = 0; pvB = 0;} 1460 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1461 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1462 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1463 nztot = nzA + nzB; 1464 1465 cmap = mat->garray; 1466 if (v || idx) { 1467 if (nztot) { 1468 /* Sort by increasing column numbers, assuming A and B already sorted */ 1469 PetscInt imark = -1; 1470 if (v) { 1471 *v = v_p = mat->rowvalues; 1472 for (i=0; i<nzB; i++) { 1473 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1474 else break; 1475 } 1476 imark = i; 1477 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1478 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1479 } 1480 if (idx) { 1481 *idx = idx_p = mat->rowindices; 1482 if (imark > -1) { 1483 for (i=0; i<imark; i++) { 1484 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1485 } 1486 } else { 1487 for (i=0; i<nzB; i++) { 1488 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1489 else break; 1490 } 1491 imark = i; 1492 } 1493 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1494 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1495 } 1496 } else { 1497 if (idx) *idx = 0; 1498 if (v) *v = 0; 1499 } 1500 } 1501 *nz = nztot; 1502 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1503 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1504 PetscFunctionReturn(0); 1505 } 1506 1507 #undef __FUNCT__ 1508 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1509 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1510 { 1511 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1512 1513 PetscFunctionBegin; 1514 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1515 baij->getrowactive = PETSC_FALSE; 1516 PetscFunctionReturn(0); 1517 } 1518 1519 #undef __FUNCT__ 1520 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1521 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1522 { 1523 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1524 PetscErrorCode ierr; 1525 1526 PetscFunctionBegin; 1527 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1528 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1529 PetscFunctionReturn(0); 1530 } 1531 1532 #undef __FUNCT__ 1533 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1534 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1535 { 1536 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1537 Mat A = a->A,B = a->B; 1538 PetscErrorCode ierr; 1539 PetscReal isend[5],irecv[5]; 1540 1541 PetscFunctionBegin; 1542 info->block_size = (PetscReal)matin->rmap->bs; 1543 1544 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1545 1546 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1547 isend[3] = info->memory; isend[4] = info->mallocs; 1548 1549 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1550 1551 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1552 isend[3] += info->memory; isend[4] += info->mallocs; 1553 1554 if (flag == MAT_LOCAL) { 1555 info->nz_used = isend[0]; 1556 info->nz_allocated = isend[1]; 1557 info->nz_unneeded = isend[2]; 1558 info->memory = isend[3]; 1559 info->mallocs = isend[4]; 1560 } else if (flag == MAT_GLOBAL_MAX) { 1561 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1562 1563 info->nz_used = irecv[0]; 1564 info->nz_allocated = irecv[1]; 1565 info->nz_unneeded = irecv[2]; 1566 info->memory = irecv[3]; 1567 info->mallocs = irecv[4]; 1568 } else if (flag == MAT_GLOBAL_SUM) { 1569 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1570 1571 info->nz_used = irecv[0]; 1572 info->nz_allocated = irecv[1]; 1573 info->nz_unneeded = irecv[2]; 1574 info->memory = irecv[3]; 1575 info->mallocs = irecv[4]; 1576 } else SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1577 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1578 info->fill_ratio_needed = 0; 1579 info->factor_mallocs = 0; 1580 PetscFunctionReturn(0); 1581 } 1582 1583 #undef __FUNCT__ 1584 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1585 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1586 { 1587 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1588 PetscErrorCode ierr; 1589 1590 PetscFunctionBegin; 1591 switch (op) { 1592 case MAT_NEW_NONZERO_LOCATIONS: 1593 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1594 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1595 case MAT_KEEP_NONZERO_PATTERN: 1596 case MAT_NEW_NONZERO_LOCATION_ERR: 1597 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1598 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1599 break; 1600 case MAT_ROW_ORIENTED: 1601 a->roworiented = flg; 1602 1603 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1604 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1605 break; 1606 case MAT_NEW_DIAGONALS: 1607 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1608 break; 1609 case MAT_IGNORE_OFF_PROC_ENTRIES: 1610 a->donotstash = flg; 1611 break; 1612 case MAT_USE_HASH_TABLE: 1613 a->ht_flag = flg; 1614 break; 1615 case MAT_SYMMETRIC: 1616 case MAT_STRUCTURALLY_SYMMETRIC: 1617 case MAT_HERMITIAN: 1618 case MAT_SYMMETRY_ETERNAL: 1619 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1620 break; 1621 default: 1622 SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op); 1623 } 1624 PetscFunctionReturn(0); 1625 } 1626 1627 #undef __FUNCT__ 1628 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1629 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1630 { 1631 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1632 Mat_SeqBAIJ *Aloc; 1633 Mat B; 1634 PetscErrorCode ierr; 1635 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1636 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1637 MatScalar *a; 1638 1639 PetscFunctionBegin; 1640 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1641 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1642 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1643 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1644 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1645 /* Do not know preallocation information, but must set block size */ 1646 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,PETSC_NULL,PETSC_DECIDE,PETSC_NULL);CHKERRQ(ierr); 1647 } else { 1648 B = *matout; 1649 } 1650 1651 /* copy over the A part */ 1652 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1653 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1654 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1655 1656 for (i=0; i<mbs; i++) { 1657 rvals[0] = bs*(baij->rstartbs + i); 1658 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1659 for (j=ai[i]; j<ai[i+1]; j++) { 1660 col = (baij->cstartbs+aj[j])*bs; 1661 for (k=0; k<bs; k++) { 1662 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1663 1664 col++; a += bs; 1665 } 1666 } 1667 } 1668 /* copy over the B part */ 1669 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1670 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1671 for (i=0; i<mbs; i++) { 1672 rvals[0] = bs*(baij->rstartbs + i); 1673 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1674 for (j=ai[i]; j<ai[i+1]; j++) { 1675 col = baij->garray[aj[j]]*bs; 1676 for (k=0; k<bs; k++) { 1677 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1678 col++; 1679 a += bs; 1680 } 1681 } 1682 } 1683 ierr = PetscFree(rvals);CHKERRQ(ierr); 1684 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1685 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1686 1687 if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B; 1688 else { 1689 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1690 } 1691 PetscFunctionReturn(0); 1692 } 1693 1694 #undef __FUNCT__ 1695 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1696 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1697 { 1698 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1699 Mat a = baij->A,b = baij->B; 1700 PetscErrorCode ierr; 1701 PetscInt s1,s2,s3; 1702 1703 PetscFunctionBegin; 1704 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1705 if (rr) { 1706 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1707 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1708 /* Overlap communication with computation. */ 1709 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1710 } 1711 if (ll) { 1712 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1713 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1714 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1715 } 1716 /* scale the diagonal block */ 1717 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1718 1719 if (rr) { 1720 /* Do a scatter end and then right scale the off-diagonal block */ 1721 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1722 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1723 } 1724 PetscFunctionReturn(0); 1725 } 1726 1727 #undef __FUNCT__ 1728 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1729 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1730 { 1731 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1732 PetscErrorCode ierr; 1733 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1734 PetscInt i,*owners = A->rmap->range; 1735 PetscInt *nprocs,j,idx,nsends,row; 1736 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1737 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1738 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1739 MPI_Comm comm = ((PetscObject)A)->comm; 1740 MPI_Request *send_waits,*recv_waits; 1741 MPI_Status recv_status,*send_status; 1742 const PetscScalar *xx; 1743 PetscScalar *bb; 1744 #if defined(PETSC_DEBUG) 1745 PetscBool found = PETSC_FALSE; 1746 #endif 1747 1748 PetscFunctionBegin; 1749 /* first count number of contributors to each processor */ 1750 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1751 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1752 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1753 j = 0; 1754 for (i=0; i<N; i++) { 1755 if (lastidx > (idx = rows[i])) j = 0; 1756 lastidx = idx; 1757 for (; j<size; j++) { 1758 if (idx >= owners[j] && idx < owners[j+1]) { 1759 nprocs[2*j]++; 1760 nprocs[2*j+1] = 1; 1761 owner[i] = j; 1762 #if defined(PETSC_DEBUG) 1763 found = PETSC_TRUE; 1764 #endif 1765 break; 1766 } 1767 } 1768 #if defined(PETSC_DEBUG) 1769 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1770 found = PETSC_FALSE; 1771 #endif 1772 } 1773 nsends = 0; for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 1774 1775 if (A->nooffproczerorows) { 1776 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 1777 nrecvs = nsends; 1778 nmax = N; 1779 } else { 1780 /* inform other processors of number of messages and max length*/ 1781 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1782 } 1783 1784 /* post receives: */ 1785 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1786 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1787 for (i=0; i<nrecvs; i++) { 1788 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1789 } 1790 1791 /* do sends: 1792 1) starts[i] gives the starting index in svalues for stuff going to 1793 the ith processor 1794 */ 1795 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1796 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1797 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1798 starts[0] = 0; 1799 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 1800 for (i=0; i<N; i++) { 1801 svalues[starts[owner[i]]++] = rows[i]; 1802 } 1803 1804 starts[0] = 0; 1805 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 1806 count = 0; 1807 for (i=0; i<size; i++) { 1808 if (nprocs[2*i+1]) { 1809 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1810 } 1811 } 1812 ierr = PetscFree(starts);CHKERRQ(ierr); 1813 1814 base = owners[rank]; 1815 1816 /* wait on receives */ 1817 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1818 count = nrecvs; 1819 slen = 0; 1820 while (count) { 1821 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1822 /* unpack receives into our local space */ 1823 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1824 1825 source[imdex] = recv_status.MPI_SOURCE; 1826 lens[imdex] = n; 1827 slen += n; 1828 count--; 1829 } 1830 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1831 1832 /* move the data into the send scatter */ 1833 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1834 count = 0; 1835 for (i=0; i<nrecvs; i++) { 1836 values = rvalues + i*nmax; 1837 for (j=0; j<lens[i]; j++) { 1838 lrows[count++] = values[j] - base; 1839 } 1840 } 1841 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1842 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1843 ierr = PetscFree(owner);CHKERRQ(ierr); 1844 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1845 1846 /* fix right hand side if needed */ 1847 if (x && b) { 1848 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1849 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1850 for (i=0; i<slen; i++) { 1851 bb[lrows[i]] = diag*xx[lrows[i]]; 1852 } 1853 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1854 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1855 } 1856 1857 /* actually zap the local rows */ 1858 /* 1859 Zero the required rows. If the "diagonal block" of the matrix 1860 is square and the user wishes to set the diagonal we use separate 1861 code so that MatSetValues() is not called for each diagonal allocating 1862 new memory, thus calling lots of mallocs and slowing things down. 1863 1864 */ 1865 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1866 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1867 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1868 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 1869 } else if (diag != 0.0) { 1870 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1871 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1872 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1873 for (i=0; i<slen; i++) { 1874 row = lrows[i] + rstart_bs; 1875 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1876 } 1877 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1878 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1879 } else { 1880 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1881 } 1882 1883 ierr = PetscFree(lrows);CHKERRQ(ierr); 1884 1885 /* wait on sends */ 1886 if (nsends) { 1887 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1888 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1889 ierr = PetscFree(send_status);CHKERRQ(ierr); 1890 } 1891 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1892 ierr = PetscFree(svalues);CHKERRQ(ierr); 1893 PetscFunctionReturn(0); 1894 } 1895 1896 #undef __FUNCT__ 1897 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1898 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1899 { 1900 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1901 PetscErrorCode ierr; 1902 1903 PetscFunctionBegin; 1904 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1905 PetscFunctionReturn(0); 1906 } 1907 1908 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 1909 1910 #undef __FUNCT__ 1911 #define __FUNCT__ "MatEqual_MPIBAIJ" 1912 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1913 { 1914 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1915 Mat a,b,c,d; 1916 PetscBool flg; 1917 PetscErrorCode ierr; 1918 1919 PetscFunctionBegin; 1920 a = matA->A; b = matA->B; 1921 c = matB->A; d = matB->B; 1922 1923 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1924 if (flg) { 1925 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1926 } 1927 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1928 PetscFunctionReturn(0); 1929 } 1930 1931 #undef __FUNCT__ 1932 #define __FUNCT__ "MatCopy_MPIBAIJ" 1933 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1934 { 1935 PetscErrorCode ierr; 1936 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1937 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 1938 1939 PetscFunctionBegin; 1940 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1941 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1942 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1943 } else { 1944 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1945 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1946 } 1947 PetscFunctionReturn(0); 1948 } 1949 1950 #undef __FUNCT__ 1951 #define __FUNCT__ "MatSetUp_MPIBAIJ" 1952 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1953 { 1954 PetscErrorCode ierr; 1955 1956 PetscFunctionBegin; 1957 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1958 PetscFunctionReturn(0); 1959 } 1960 1961 #undef __FUNCT__ 1962 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1963 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1964 { 1965 PetscErrorCode ierr; 1966 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 1967 PetscBLASInt bnz,one=1; 1968 Mat_SeqBAIJ *x,*y; 1969 1970 PetscFunctionBegin; 1971 if (str == SAME_NONZERO_PATTERN) { 1972 PetscScalar alpha = a; 1973 x = (Mat_SeqBAIJ*)xx->A->data; 1974 y = (Mat_SeqBAIJ*)yy->A->data; 1975 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 1976 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1977 x = (Mat_SeqBAIJ*)xx->B->data; 1978 y = (Mat_SeqBAIJ*)yy->B->data; 1979 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 1980 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1981 } else { 1982 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1983 } 1984 PetscFunctionReturn(0); 1985 } 1986 1987 #undef __FUNCT__ 1988 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1989 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1990 { 1991 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1992 PetscErrorCode ierr; 1993 1994 PetscFunctionBegin; 1995 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1996 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1997 PetscFunctionReturn(0); 1998 } 1999 2000 #undef __FUNCT__ 2001 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 2002 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2003 { 2004 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2005 PetscErrorCode ierr; 2006 2007 PetscFunctionBegin; 2008 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2009 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2010 PetscFunctionReturn(0); 2011 } 2012 2013 #undef __FUNCT__ 2014 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2015 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2016 { 2017 PetscErrorCode ierr; 2018 IS iscol_local; 2019 PetscInt csize; 2020 2021 PetscFunctionBegin; 2022 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2023 if (call == MAT_REUSE_MATRIX) { 2024 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2025 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2026 } else { 2027 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2028 } 2029 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2030 if (call == MAT_INITIAL_MATRIX) { 2031 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2032 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2033 } 2034 PetscFunctionReturn(0); 2035 } 2036 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*); 2037 #undef __FUNCT__ 2038 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2039 /* 2040 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2041 in local and then by concatenating the local matrices the end result. 2042 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2043 */ 2044 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2045 { 2046 PetscErrorCode ierr; 2047 PetscMPIInt rank,size; 2048 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2049 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow; 2050 Mat M,Mreuse; 2051 MatScalar *vwork,*aa; 2052 MPI_Comm comm = ((PetscObject)mat)->comm; 2053 IS isrow_new, iscol_new; 2054 PetscBool idflag,allrows, allcols; 2055 Mat_SeqBAIJ *aij; 2056 2057 PetscFunctionBegin; 2058 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2059 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2060 /* The compression and expansion should be avoided. Doesn't point 2061 out errors, might change the indices, hence buggey */ 2062 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 2063 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 2064 2065 /* Check for special case: each processor gets entire matrix columns */ 2066 ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr); 2067 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 2068 if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE; 2069 else allcols = PETSC_FALSE; 2070 2071 ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr); 2072 ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr); 2073 if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE; 2074 else allrows = PETSC_FALSE; 2075 2076 if (call == MAT_REUSE_MATRIX) { 2077 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 2078 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2079 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2080 } else { 2081 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2082 } 2083 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 2084 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 2085 /* 2086 m - number of local rows 2087 n - number of columns (same on all processors) 2088 rstart - first row in new global matrix generated 2089 */ 2090 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2091 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2092 m = m/bs; 2093 n = n/bs; 2094 2095 if (call == MAT_INITIAL_MATRIX) { 2096 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2097 ii = aij->i; 2098 jj = aij->j; 2099 2100 /* 2101 Determine the number of non-zeros in the diagonal and off-diagonal 2102 portions of the matrix in order to do correct preallocation 2103 */ 2104 2105 /* first get start and end of "diagonal" columns */ 2106 if (csize == PETSC_DECIDE) { 2107 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2108 if (mglobal == n*bs) { /* square matrix */ 2109 nlocal = m; 2110 } else { 2111 nlocal = n/size + ((n % size) > rank); 2112 } 2113 } else { 2114 nlocal = csize/bs; 2115 } 2116 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2117 rstart = rend - nlocal; 2118 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2119 2120 /* next, compute all the lengths */ 2121 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2122 olens = dlens + m; 2123 for (i=0; i<m; i++) { 2124 jend = ii[i+1] - ii[i]; 2125 olen = 0; 2126 dlen = 0; 2127 for (j=0; j<jend; j++) { 2128 if (*jj < rstart || *jj >= rend) olen++; 2129 else dlen++; 2130 jj++; 2131 } 2132 olens[i] = olen; 2133 dlens[i] = dlen; 2134 } 2135 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2136 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2137 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2138 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2139 ierr = PetscFree(dlens);CHKERRQ(ierr); 2140 } else { 2141 PetscInt ml,nl; 2142 2143 M = *newmat; 2144 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2145 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2146 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2147 /* 2148 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2149 rather than the slower MatSetValues(). 2150 */ 2151 M->was_assembled = PETSC_TRUE; 2152 M->assembled = PETSC_FALSE; 2153 } 2154 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2155 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2156 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2157 ii = aij->i; 2158 jj = aij->j; 2159 aa = aij->a; 2160 for (i=0; i<m; i++) { 2161 row = rstart/bs + i; 2162 nz = ii[i+1] - ii[i]; 2163 cwork = jj; jj += nz; 2164 vwork = aa; aa += nz*bs*bs; 2165 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2166 } 2167 2168 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2169 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2170 *newmat = M; 2171 2172 /* save submatrix used in processor for next request */ 2173 if (call == MAT_INITIAL_MATRIX) { 2174 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2175 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2176 } 2177 PetscFunctionReturn(0); 2178 } 2179 2180 #undef __FUNCT__ 2181 #define __FUNCT__ "MatPermute_MPIBAIJ" 2182 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2183 { 2184 MPI_Comm comm,pcomm; 2185 PetscInt first,rlocal_size,clocal_size,nrows; 2186 const PetscInt *rows; 2187 PetscMPIInt size; 2188 IS crowp,growp,irowp,lrowp,lcolp; 2189 PetscErrorCode ierr; 2190 2191 PetscFunctionBegin; 2192 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2193 /* make a collective version of 'rowp' */ 2194 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2195 if (pcomm==comm) { 2196 crowp = rowp; 2197 } else { 2198 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2199 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2200 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2201 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2202 } 2203 /* collect the global row permutation and invert it */ 2204 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2205 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2206 if (pcomm!=comm) { 2207 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2208 } 2209 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2210 ierr = ISDestroy(&growp);CHKERRQ(ierr); 2211 /* get the local target indices */ 2212 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2213 ierr = MatGetLocalSize(A,&rlocal_size,&clocal_size);CHKERRQ(ierr); 2214 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2215 ierr = ISCreateGeneral(MPI_COMM_SELF,rlocal_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 2216 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2217 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 2218 /* the column permutation is so much easier; 2219 make a local version of 'colp' and invert it */ 2220 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2221 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2222 if (size==1) { 2223 lcolp = colp; 2224 } else { 2225 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 2226 } 2227 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2228 /* now we just get the submatrix */ 2229 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2230 if (size>1) { 2231 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2232 } 2233 /* clean up */ 2234 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 2235 PetscFunctionReturn(0); 2236 } 2237 2238 #undef __FUNCT__ 2239 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2240 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2241 { 2242 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2243 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2244 2245 PetscFunctionBegin; 2246 if (nghosts) *nghosts = B->nbs; 2247 if (ghosts) *ghosts = baij->garray; 2248 PetscFunctionReturn(0); 2249 } 2250 2251 extern PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat); 2252 2253 #undef __FUNCT__ 2254 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2255 /* 2256 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2257 */ 2258 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2259 { 2260 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2261 PetscErrorCode ierr; 2262 PetscMPIInt size,*ncolsonproc,*disp,nn; 2263 PetscInt bs,i,n,nrows,j,k,m,ncols,col; 2264 const PetscInt *is,*rows = 0,*A_ci,*A_cj,*B_ci,*B_cj; 2265 PetscInt nis = iscoloring->n,nctot,*cols; 2266 PetscInt *rowhit,M,cstart,cend,colb; 2267 PetscInt *columnsforrow,l; 2268 IS *isa; 2269 PetscBool done,flg; 2270 ISLocalToGlobalMapping map = mat->cmap->bmapping; 2271 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL),ctype=c->ctype; 2272 2273 PetscFunctionBegin; 2274 if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2275 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2276 2277 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2278 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2279 2280 M = mat->rmap->n/bs; 2281 cstart = mat->cmap->rstart/bs; 2282 cend = mat->cmap->rend/bs; 2283 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2284 c->N = mat->cmap->N/bs; 2285 c->m = mat->rmap->n/bs; 2286 c->rstart = mat->rmap->rstart/bs; 2287 2288 c->ncolors = nis; 2289 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2290 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2291 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2292 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2293 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2294 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2295 2296 /* Allow access to data structures of local part of matrix */ 2297 if (!baij->colmap) { 2298 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2299 } 2300 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2301 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2302 2303 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2304 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2305 2306 for (i=0; i<nis; i++) { 2307 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2308 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2309 2310 c->ncolumns[i] = n; 2311 if (n) { 2312 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2313 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2314 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2315 } else { 2316 c->columns[i] = 0; 2317 } 2318 2319 if (ctype == IS_COLORING_GLOBAL) { 2320 /* Determine the total (parallel) number of columns of this color */ 2321 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2322 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2323 2324 ierr = PetscMPIIntCast(n,&nn);CHKERRQ(ierr); 2325 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2326 nctot = 0; for (j=0; j<size; j++) nctot += ncolsonproc[j]; 2327 if (!nctot) { 2328 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2329 } 2330 2331 disp[0] = 0; 2332 for (j=1; j<size; j++) { 2333 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2334 } 2335 2336 /* Get complete list of columns for color on each processor */ 2337 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2338 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2339 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2340 } else if (ctype == IS_COLORING_GHOSTED) { 2341 /* Determine local number of columns of this color on this process, including ghost points */ 2342 nctot = n; 2343 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2344 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2345 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2346 2347 /* 2348 Mark all rows affect by these columns 2349 */ 2350 /* Temporary option to allow for debugging/testing */ 2351 flg = PETSC_FALSE; 2352 ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2353 if (!flg) { /*-----------------------------------------------------------------------------*/ 2354 /* crude, fast version */ 2355 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2356 /* loop over columns*/ 2357 for (j=0; j<nctot; j++) { 2358 if (ctype == IS_COLORING_GHOSTED) { 2359 col = ltog[cols[j]]; 2360 } else { 2361 col = cols[j]; 2362 } 2363 if (col >= cstart && col < cend) { 2364 /* column is in diagonal block of matrix */ 2365 rows = A_cj + A_ci[col-cstart]; 2366 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2367 } else { 2368 #if defined(PETSC_USE_CTABLE) 2369 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2370 colb--; 2371 #else 2372 colb = baij->colmap[col] - 1; 2373 #endif 2374 if (colb == -1) { 2375 m = 0; 2376 } else { 2377 colb = colb/bs; 2378 rows = B_cj + B_ci[colb]; 2379 m = B_ci[colb+1] - B_ci[colb]; 2380 } 2381 } 2382 /* loop over columns marking them in rowhit */ 2383 for (k=0; k<m; k++) { 2384 rowhit[*rows++] = col + 1; 2385 } 2386 } 2387 2388 /* count the number of hits */ 2389 nrows = 0; 2390 for (j=0; j<M; j++) { 2391 if (rowhit[j]) nrows++; 2392 } 2393 c->nrows[i] = nrows; 2394 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2395 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2396 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2397 nrows = 0; 2398 for (j=0; j<M; j++) { 2399 if (rowhit[j]) { 2400 c->rows[i][nrows] = j; 2401 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2402 nrows++; 2403 } 2404 } 2405 } else { /*-------------------------------------------------------------------------------*/ 2406 /* slow version, using rowhit as a linked list */ 2407 PetscInt currentcol,fm,mfm; 2408 rowhit[M] = M; 2409 nrows = 0; 2410 /* loop over columns*/ 2411 for (j=0; j<nctot; j++) { 2412 if (ctype == IS_COLORING_GHOSTED) { 2413 col = ltog[cols[j]]; 2414 } else { 2415 col = cols[j]; 2416 } 2417 if (col >= cstart && col < cend) { 2418 /* column is in diagonal block of matrix */ 2419 rows = A_cj + A_ci[col-cstart]; 2420 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2421 } else { 2422 #if defined(PETSC_USE_CTABLE) 2423 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2424 colb--; 2425 #else 2426 colb = baij->colmap[col] - 1; 2427 #endif 2428 if (colb == -1) { 2429 m = 0; 2430 } else { 2431 colb = colb/bs; 2432 rows = B_cj + B_ci[colb]; 2433 m = B_ci[colb+1] - B_ci[colb]; 2434 } 2435 } 2436 2437 /* loop over columns marking them in rowhit */ 2438 fm = M; /* fm points to first entry in linked list */ 2439 for (k=0; k<m; k++) { 2440 currentcol = *rows++; 2441 /* is it already in the list? */ 2442 do { 2443 mfm = fm; 2444 fm = rowhit[fm]; 2445 } while (fm < currentcol); 2446 /* not in list so add it */ 2447 if (fm != currentcol) { 2448 nrows++; 2449 columnsforrow[currentcol] = col; 2450 /* next three lines insert new entry into linked list */ 2451 rowhit[mfm] = currentcol; 2452 rowhit[currentcol] = fm; 2453 fm = currentcol; 2454 /* fm points to present position in list since we know the columns are sorted */ 2455 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2456 } 2457 } 2458 c->nrows[i] = nrows; 2459 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2460 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2461 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2462 /* now store the linked list of rows into c->rows[i] */ 2463 nrows = 0; 2464 fm = rowhit[M]; 2465 do { 2466 c->rows[i][nrows] = fm; 2467 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2468 fm = rowhit[fm]; 2469 } while (fm < M); 2470 } /* ---------------------------------------------------------------------------------------*/ 2471 ierr = PetscFree(cols);CHKERRQ(ierr); 2472 } 2473 2474 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2475 /* 2476 vscale will contain the "diagonal" on processor scalings followed by the off processor 2477 */ 2478 if (ctype == IS_COLORING_GLOBAL) { 2479 PetscInt *garray; 2480 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2481 for (i=0; i<baij->B->cmap->n/bs; i++) { 2482 for (j=0; j<bs; j++) { 2483 garray[i*bs+j] = bs*baij->garray[i]+j; 2484 } 2485 } 2486 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2487 ierr = PetscFree(garray);CHKERRQ(ierr); 2488 CHKMEMQ; 2489 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2490 for (k=0; k<c->ncolors; k++) { 2491 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2492 for (l=0; l<c->nrows[k]; l++) { 2493 col = c->columnsforrow[k][l]; 2494 if (col >= cstart && col < cend) { 2495 /* column is in diagonal block of matrix */ 2496 colb = col - cstart; 2497 } else { 2498 /* column is in "off-processor" part */ 2499 #if defined(PETSC_USE_CTABLE) 2500 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2501 colb--; 2502 #else 2503 colb = baij->colmap[col] - 1; 2504 #endif 2505 colb = colb/bs; 2506 colb += cend - cstart; 2507 } 2508 c->vscaleforrow[k][l] = colb; 2509 } 2510 } 2511 } else if (ctype == IS_COLORING_GHOSTED) { 2512 /* Get gtol mapping */ 2513 PetscInt N = mat->cmap->N, *gtol; 2514 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2515 for (i=0; i<N; i++) gtol[i] = -1; 2516 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2517 2518 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2519 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2520 for (k=0; k<c->ncolors; k++) { 2521 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2522 for (l=0; l<c->nrows[k]; l++) { 2523 col = c->columnsforrow[k][l]; /* global column index */ 2524 2525 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2526 } 2527 } 2528 ierr = PetscFree(gtol);CHKERRQ(ierr); 2529 } 2530 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2531 2532 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2533 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2534 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2535 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2536 CHKMEMQ; 2537 PetscFunctionReturn(0); 2538 } 2539 2540 #undef __FUNCT__ 2541 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ" 2542 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2543 { 2544 Mat B; 2545 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2546 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2547 Mat_SeqAIJ *b; 2548 PetscErrorCode ierr; 2549 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2550 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2551 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2552 2553 PetscFunctionBegin; 2554 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2555 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2556 2557 /* ---------------------------------------------------------------- 2558 Tell every processor the number of nonzeros per row 2559 */ 2560 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2561 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2562 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2563 } 2564 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2565 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2566 displs = recvcounts + size; 2567 for (i=0; i<size; i++) { 2568 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2569 displs[i] = A->rmap->range[i]/bs; 2570 } 2571 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2572 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2573 #else 2574 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2575 #endif 2576 /* --------------------------------------------------------------- 2577 Create the sequential matrix of the same type as the local block diagonal 2578 */ 2579 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2580 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2581 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2582 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2583 b = (Mat_SeqAIJ*)B->data; 2584 2585 /*-------------------------------------------------------------------- 2586 Copy my part of matrix column indices over 2587 */ 2588 sendcount = ad->nz + bd->nz; 2589 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2590 a_jsendbuf = ad->j; 2591 b_jsendbuf = bd->j; 2592 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2593 cnt = 0; 2594 for (i=0; i<n; i++) { 2595 2596 /* put in lower diagonal portion */ 2597 m = bd->i[i+1] - bd->i[i]; 2598 while (m > 0) { 2599 /* is it above diagonal (in bd (compressed) numbering) */ 2600 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2601 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2602 m--; 2603 } 2604 2605 /* put in diagonal portion */ 2606 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2607 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2608 } 2609 2610 /* put in upper diagonal portion */ 2611 while (m-- > 0) { 2612 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2613 } 2614 } 2615 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2616 2617 /*-------------------------------------------------------------------- 2618 Gather all column indices to all processors 2619 */ 2620 for (i=0; i<size; i++) { 2621 recvcounts[i] = 0; 2622 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2623 recvcounts[i] += lens[j]; 2624 } 2625 } 2626 displs[0] = 0; 2627 for (i=1; i<size; i++) { 2628 displs[i] = displs[i-1] + recvcounts[i-1]; 2629 } 2630 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2631 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2632 #else 2633 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2634 #endif 2635 /*-------------------------------------------------------------------- 2636 Assemble the matrix into useable form (note numerical values not yet set) 2637 */ 2638 /* set the b->ilen (length of each row) values */ 2639 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2640 /* set the b->i indices */ 2641 b->i[0] = 0; 2642 for (i=1; i<=A->rmap->N/bs; i++) { 2643 b->i[i] = b->i[i-1] + lens[i-1]; 2644 } 2645 ierr = PetscFree(lens);CHKERRQ(ierr); 2646 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2647 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2648 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2649 2650 if (A->symmetric) { 2651 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2652 } else if (A->hermitian) { 2653 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2654 } else if (A->structurally_symmetric) { 2655 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2656 } 2657 *newmat = B; 2658 PetscFunctionReturn(0); 2659 } 2660 2661 #undef __FUNCT__ 2662 #define __FUNCT__ "MatSOR_MPIBAIJ" 2663 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2664 { 2665 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2666 PetscErrorCode ierr; 2667 Vec bb1 = 0; 2668 2669 PetscFunctionBegin; 2670 if (flag == SOR_APPLY_UPPER) { 2671 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2672 PetscFunctionReturn(0); 2673 } 2674 2675 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2676 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2677 } 2678 2679 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2680 if (flag & SOR_ZERO_INITIAL_GUESS) { 2681 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2682 its--; 2683 } 2684 2685 while (its--) { 2686 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2687 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2688 2689 /* update rhs: bb1 = bb - B*x */ 2690 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2691 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2692 2693 /* local sweep */ 2694 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2695 } 2696 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2697 if (flag & SOR_ZERO_INITIAL_GUESS) { 2698 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2699 its--; 2700 } 2701 while (its--) { 2702 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2703 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2704 2705 /* update rhs: bb1 = bb - B*x */ 2706 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2707 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2708 2709 /* local sweep */ 2710 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2711 } 2712 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2713 if (flag & SOR_ZERO_INITIAL_GUESS) { 2714 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2715 its--; 2716 } 2717 while (its--) { 2718 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2719 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2720 2721 /* update rhs: bb1 = bb - B*x */ 2722 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2723 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2724 2725 /* local sweep */ 2726 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2727 } 2728 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2729 2730 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2731 PetscFunctionReturn(0); 2732 } 2733 2734 extern PetscErrorCode MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2735 2736 #undef __FUNCT__ 2737 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ" 2738 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2739 { 2740 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2741 PetscErrorCode ierr; 2742 2743 PetscFunctionBegin; 2744 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2745 PetscFunctionReturn(0); 2746 } 2747 2748 2749 /* -------------------------------------------------------------------*/ 2750 static struct _MatOps MatOps_Values = { 2751 MatSetValues_MPIBAIJ, 2752 MatGetRow_MPIBAIJ, 2753 MatRestoreRow_MPIBAIJ, 2754 MatMult_MPIBAIJ, 2755 /* 4*/ MatMultAdd_MPIBAIJ, 2756 MatMultTranspose_MPIBAIJ, 2757 MatMultTransposeAdd_MPIBAIJ, 2758 0, 2759 0, 2760 0, 2761 /*10*/ 0, 2762 0, 2763 0, 2764 MatSOR_MPIBAIJ, 2765 MatTranspose_MPIBAIJ, 2766 /*15*/ MatGetInfo_MPIBAIJ, 2767 MatEqual_MPIBAIJ, 2768 MatGetDiagonal_MPIBAIJ, 2769 MatDiagonalScale_MPIBAIJ, 2770 MatNorm_MPIBAIJ, 2771 /*20*/ MatAssemblyBegin_MPIBAIJ, 2772 MatAssemblyEnd_MPIBAIJ, 2773 MatSetOption_MPIBAIJ, 2774 MatZeroEntries_MPIBAIJ, 2775 /*24*/ MatZeroRows_MPIBAIJ, 2776 0, 2777 0, 2778 0, 2779 0, 2780 /*29*/ MatSetUp_MPIBAIJ, 2781 0, 2782 0, 2783 0, 2784 0, 2785 /*34*/ MatDuplicate_MPIBAIJ, 2786 0, 2787 0, 2788 0, 2789 0, 2790 /*39*/ MatAXPY_MPIBAIJ, 2791 MatGetSubMatrices_MPIBAIJ, 2792 MatIncreaseOverlap_MPIBAIJ, 2793 MatGetValues_MPIBAIJ, 2794 MatCopy_MPIBAIJ, 2795 /*44*/ 0, 2796 MatScale_MPIBAIJ, 2797 0, 2798 0, 2799 0, 2800 /*49*/ 0, 2801 0, 2802 0, 2803 0, 2804 0, 2805 /*54*/ MatFDColoringCreate_MPIBAIJ, 2806 0, 2807 MatSetUnfactored_MPIBAIJ, 2808 MatPermute_MPIBAIJ, 2809 MatSetValuesBlocked_MPIBAIJ, 2810 /*59*/ MatGetSubMatrix_MPIBAIJ, 2811 MatDestroy_MPIBAIJ, 2812 MatView_MPIBAIJ, 2813 0, 2814 0, 2815 /*64*/ 0, 2816 0, 2817 0, 2818 0, 2819 0, 2820 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2821 0, 2822 0, 2823 0, 2824 0, 2825 /*74*/ 0, 2826 MatFDColoringApply_BAIJ, 2827 0, 2828 0, 2829 0, 2830 /*79*/ 0, 2831 0, 2832 0, 2833 0, 2834 MatLoad_MPIBAIJ, 2835 /*84*/ 0, 2836 0, 2837 0, 2838 0, 2839 0, 2840 /*89*/ 0, 2841 0, 2842 0, 2843 0, 2844 0, 2845 /*94*/ 0, 2846 0, 2847 0, 2848 0, 2849 0, 2850 /*99*/ 0, 2851 0, 2852 0, 2853 0, 2854 0, 2855 /*104*/0, 2856 MatRealPart_MPIBAIJ, 2857 MatImaginaryPart_MPIBAIJ, 2858 0, 2859 0, 2860 /*109*/0, 2861 0, 2862 0, 2863 0, 2864 0, 2865 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2866 0, 2867 MatGetGhosts_MPIBAIJ, 2868 0, 2869 0, 2870 /*119*/0, 2871 0, 2872 0, 2873 0, 2874 0, 2875 /*124*/0, 2876 0, 2877 MatInvertBlockDiagonal_MPIBAIJ 2878 }; 2879 2880 EXTERN_C_BEGIN 2881 #undef __FUNCT__ 2882 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2883 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2884 { 2885 PetscFunctionBegin; 2886 *a = ((Mat_MPIBAIJ*)A->data)->A; 2887 PetscFunctionReturn(0); 2888 } 2889 EXTERN_C_END 2890 2891 EXTERN_C_BEGIN 2892 extern PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2893 EXTERN_C_END 2894 2895 EXTERN_C_BEGIN 2896 #undef __FUNCT__ 2897 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2898 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2899 { 2900 PetscInt m,rstart,cstart,cend; 2901 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2902 const PetscInt *JJ =0; 2903 PetscScalar *values=0; 2904 PetscErrorCode ierr; 2905 2906 PetscFunctionBegin; 2907 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2908 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2909 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2910 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2911 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2912 m = B->rmap->n/bs; 2913 rstart = B->rmap->rstart/bs; 2914 cstart = B->cmap->rstart/bs; 2915 cend = B->cmap->rend/bs; 2916 2917 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2918 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2919 for (i=0; i<m; i++) { 2920 nz = ii[i+1] - ii[i]; 2921 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2922 nz_max = PetscMax(nz_max,nz); 2923 JJ = jj + ii[i]; 2924 for (j=0; j<nz; j++) { 2925 if (*JJ >= cstart) break; 2926 JJ++; 2927 } 2928 d = 0; 2929 for (; j<nz; j++) { 2930 if (*JJ++ >= cend) break; 2931 d++; 2932 } 2933 d_nnz[i] = d; 2934 o_nnz[i] = nz - d; 2935 } 2936 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2937 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2938 2939 values = (PetscScalar*)V; 2940 if (!values) { 2941 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2942 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2943 } 2944 for (i=0; i<m; i++) { 2945 PetscInt row = i + rstart; 2946 PetscInt ncols = ii[i+1] - ii[i]; 2947 const PetscInt *icols = jj + ii[i]; 2948 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2949 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2950 } 2951 2952 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2953 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2954 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2955 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2956 PetscFunctionReturn(0); 2957 } 2958 EXTERN_C_END 2959 2960 #undef __FUNCT__ 2961 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2962 /*@C 2963 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2964 (the default parallel PETSc format). 2965 2966 Collective on MPI_Comm 2967 2968 Input Parameters: 2969 + A - the matrix 2970 . bs - the block size 2971 . i - the indices into j for the start of each local row (starts with zero) 2972 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2973 - v - optional values in the matrix 2974 2975 Level: developer 2976 2977 .keywords: matrix, aij, compressed row, sparse, parallel 2978 2979 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 2980 @*/ 2981 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2982 { 2983 PetscErrorCode ierr; 2984 2985 PetscFunctionBegin; 2986 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2987 PetscValidType(B,1); 2988 PetscValidLogicalCollectiveInt(B,bs,2); 2989 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2990 PetscFunctionReturn(0); 2991 } 2992 2993 EXTERN_C_BEGIN 2994 #undef __FUNCT__ 2995 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2996 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2997 { 2998 Mat_MPIBAIJ *b; 2999 PetscErrorCode ierr; 3000 PetscInt i; 3001 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3002 3003 PetscFunctionBegin; 3004 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3005 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3006 3007 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3008 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3009 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3010 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3011 3012 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3013 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3014 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3015 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3016 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3017 3018 if (d_nnz) { 3019 for (i=0; i<B->rmap->n/bs; i++) { 3020 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 3021 } 3022 } 3023 if (o_nnz) { 3024 for (i=0; i<B->rmap->n/bs; i++) { 3025 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 3026 } 3027 } 3028 3029 b = (Mat_MPIBAIJ*)B->data; 3030 b->bs2 = bs*bs; 3031 b->mbs = B->rmap->n/bs; 3032 b->nbs = B->cmap->n/bs; 3033 b->Mbs = B->rmap->N/bs; 3034 b->Nbs = B->cmap->N/bs; 3035 3036 for (i=0; i<=b->size; i++) { 3037 b->rangebs[i] = B->rmap->range[i]/bs; 3038 } 3039 b->rstartbs = B->rmap->rstart/bs; 3040 b->rendbs = B->rmap->rend/bs; 3041 b->cstartbs = B->cmap->rstart/bs; 3042 b->cendbs = B->cmap->rend/bs; 3043 3044 if (!B->preallocated) { 3045 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3046 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3047 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 3048 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3049 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3050 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3051 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 3052 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3053 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 3054 } 3055 3056 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3057 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 3058 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3059 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3060 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3061 B->preallocated = PETSC_TRUE; 3062 PetscFunctionReturn(0); 3063 } 3064 EXTERN_C_END 3065 3066 EXTERN_C_BEGIN 3067 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 3068 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 3069 EXTERN_C_END 3070 3071 3072 EXTERN_C_BEGIN 3073 #undef __FUNCT__ 3074 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 3075 PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 3076 { 3077 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 3078 PetscErrorCode ierr; 3079 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 3080 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 3081 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 3082 3083 PetscFunctionBegin; 3084 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 3085 ii[0] = 0; 3086 CHKMEMQ; 3087 for (i=0; i<M; i++) { 3088 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 3089 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 3090 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 3091 /* remove one from count of matrix has diagonal */ 3092 for (j=id[i]; j<id[i+1]; j++) { 3093 if (jd[j] == i) {ii[i+1]--;break;} 3094 } 3095 CHKMEMQ; 3096 } 3097 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3098 cnt = 0; 3099 for (i=0; i<M; i++) { 3100 for (j=io[i]; j<io[i+1]; j++) { 3101 if (garray[jo[j]] > rstart) break; 3102 jj[cnt++] = garray[jo[j]]; 3103 CHKMEMQ; 3104 } 3105 for (k=id[i]; k<id[i+1]; k++) { 3106 if (jd[k] != i) { 3107 jj[cnt++] = rstart + jd[k]; 3108 CHKMEMQ; 3109 } 3110 } 3111 for (; j<io[i+1]; j++) { 3112 jj[cnt++] = garray[jo[j]]; 3113 CHKMEMQ; 3114 } 3115 } 3116 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 3117 PetscFunctionReturn(0); 3118 } 3119 EXTERN_C_END 3120 3121 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3122 EXTERN_C_BEGIN 3123 PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 3124 EXTERN_C_END 3125 3126 EXTERN_C_BEGIN 3127 #undef __FUNCT__ 3128 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 3129 PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 3130 { 3131 PetscErrorCode ierr; 3132 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3133 Mat B; 3134 Mat_MPIAIJ *b; 3135 3136 PetscFunctionBegin; 3137 if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled"); 3138 3139 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 3140 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3141 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 3142 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3143 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 3144 b = (Mat_MPIAIJ*) B->data; 3145 3146 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 3147 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 3148 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 3149 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 3150 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 3151 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3152 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3153 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3154 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3155 if (reuse == MAT_REUSE_MATRIX) { 3156 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3157 } else { 3158 *newmat = B; 3159 } 3160 PetscFunctionReturn(0); 3161 } 3162 EXTERN_C_END 3163 3164 EXTERN_C_BEGIN 3165 #if defined(PETSC_HAVE_MUMPS) 3166 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3167 #endif 3168 EXTERN_C_END 3169 3170 /*MC 3171 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 3172 3173 Options Database Keys: 3174 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 3175 . -mat_block_size <bs> - set the blocksize used to store the matrix 3176 - -mat_use_hash_table <fact> 3177 3178 Level: beginner 3179 3180 .seealso: MatCreateMPIBAIJ 3181 M*/ 3182 3183 EXTERN_C_BEGIN 3184 extern PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 3185 EXTERN_C_END 3186 3187 EXTERN_C_BEGIN 3188 #undef __FUNCT__ 3189 #define __FUNCT__ "MatCreate_MPIBAIJ" 3190 PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3191 { 3192 Mat_MPIBAIJ *b; 3193 PetscErrorCode ierr; 3194 PetscBool flg; 3195 3196 PetscFunctionBegin; 3197 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 3198 B->data = (void*)b; 3199 3200 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3201 B->assembled = PETSC_FALSE; 3202 3203 B->insertmode = NOT_SET_VALUES; 3204 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 3205 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 3206 3207 /* build local table of row and column ownerships */ 3208 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 3209 3210 /* build cache for off array entries formed */ 3211 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 3212 3213 b->donotstash = PETSC_FALSE; 3214 b->colmap = PETSC_NULL; 3215 b->garray = PETSC_NULL; 3216 b->roworiented = PETSC_TRUE; 3217 3218 /* stuff used in block assembly */ 3219 b->barray = 0; 3220 3221 /* stuff used for matrix vector multiply */ 3222 b->lvec = 0; 3223 b->Mvctx = 0; 3224 3225 /* stuff for MatGetRow() */ 3226 b->rowindices = 0; 3227 b->rowvalues = 0; 3228 b->getrowactive = PETSC_FALSE; 3229 3230 /* hash table stuff */ 3231 b->ht = 0; 3232 b->hd = 0; 3233 b->ht_size = 0; 3234 b->ht_flag = PETSC_FALSE; 3235 b->ht_fact = 0; 3236 b->ht_total_ct = 0; 3237 b->ht_insert_ct = 0; 3238 3239 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 3240 b->ijonly = PETSC_FALSE; 3241 3242 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3243 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3244 if (flg) { 3245 PetscReal fact = 1.39; 3246 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3247 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 3248 if (fact <= 1.0) fact = 1.39; 3249 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3250 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3251 } 3252 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3253 3254 #if defined(PETSC_HAVE_MUMPS) 3255 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3256 #endif 3257 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 3258 "MatConvert_MPIBAIJ_MPIAdj", 3259 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3260 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C", 3261 "MatConvert_MPIBAIJ_MPIAIJ", 3262 MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3263 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C", 3264 "MatConvert_MPIBAIJ_MPISBAIJ", 3265 MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3266 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3267 "MatStoreValues_MPIBAIJ", 3268 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3269 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3270 "MatRetrieveValues_MPIBAIJ", 3271 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3272 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3273 "MatGetDiagonalBlock_MPIBAIJ", 3274 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3275 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3276 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3277 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3278 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3279 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3280 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3281 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3282 "MatDiagonalScaleLocal_MPIBAIJ", 3283 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3284 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3285 "MatSetHashTableFactor_MPIBAIJ", 3286 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3287 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C", 3288 "MatConvert_MPIBAIJ_MPIBSTRM", 3289 MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr); 3290 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3291 PetscFunctionReturn(0); 3292 } 3293 EXTERN_C_END 3294 3295 /*MC 3296 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3297 3298 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3299 and MATMPIBAIJ otherwise. 3300 3301 Options Database Keys: 3302 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3303 3304 Level: beginner 3305 3306 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3307 M*/ 3308 3309 #undef __FUNCT__ 3310 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3311 /*@C 3312 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3313 (block compressed row). For good matrix assembly performance 3314 the user should preallocate the matrix storage by setting the parameters 3315 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3316 performance can be increased by more than a factor of 50. 3317 3318 Collective on Mat 3319 3320 Input Parameters: 3321 + A - the matrix 3322 . bs - size of blockk 3323 . d_nz - number of block nonzeros per block row in diagonal portion of local 3324 submatrix (same for all local rows) 3325 . d_nnz - array containing the number of block nonzeros in the various block rows 3326 of the in diagonal portion of the local (possibly different for each block 3327 row) or PETSC_NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 3328 set it even if it is zero. 3329 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3330 submatrix (same for all local rows). 3331 - o_nnz - array containing the number of nonzeros in the various block rows of the 3332 off-diagonal portion of the local submatrix (possibly different for 3333 each block row) or PETSC_NULL. 3334 3335 If the *_nnz parameter is given then the *_nz parameter is ignored 3336 3337 Options Database Keys: 3338 + -mat_block_size - size of the blocks to use 3339 - -mat_use_hash_table <fact> 3340 3341 Notes: 3342 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3343 than it must be used on all processors that share the object for that argument. 3344 3345 Storage Information: 3346 For a square global matrix we define each processor's diagonal portion 3347 to be its local rows and the corresponding columns (a square submatrix); 3348 each processor's off-diagonal portion encompasses the remainder of the 3349 local matrix (a rectangular submatrix). 3350 3351 The user can specify preallocated storage for the diagonal part of 3352 the local submatrix with either d_nz or d_nnz (not both). Set 3353 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3354 memory allocation. Likewise, specify preallocated storage for the 3355 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3356 3357 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3358 the figure below we depict these three local rows and all columns (0-11). 3359 3360 .vb 3361 0 1 2 3 4 5 6 7 8 9 10 11 3362 ------------------- 3363 row 3 | o o o d d d o o o o o o 3364 row 4 | o o o d d d o o o o o o 3365 row 5 | o o o d d d o o o o o o 3366 ------------------- 3367 .ve 3368 3369 Thus, any entries in the d locations are stored in the d (diagonal) 3370 submatrix, and any entries in the o locations are stored in the 3371 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3372 stored simply in the MATSEQBAIJ format for compressed row storage. 3373 3374 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3375 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3376 In general, for PDE problems in which most nonzeros are near the diagonal, 3377 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3378 or you will get TERRIBLE performance; see the users' manual chapter on 3379 matrices. 3380 3381 You can call MatGetInfo() to get information on how effective the preallocation was; 3382 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3383 You can also run with the option -info and look for messages with the string 3384 malloc in them to see if additional memory allocation was needed. 3385 3386 Level: intermediate 3387 3388 .keywords: matrix, block, aij, compressed row, sparse, parallel 3389 3390 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3391 @*/ 3392 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3393 { 3394 PetscErrorCode ierr; 3395 3396 PetscFunctionBegin; 3397 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3398 PetscValidType(B,1); 3399 PetscValidLogicalCollectiveInt(B,bs,2); 3400 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3401 PetscFunctionReturn(0); 3402 } 3403 3404 #undef __FUNCT__ 3405 #define __FUNCT__ "MatCreateBAIJ" 3406 /*@C 3407 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3408 (block compressed row). For good matrix assembly performance 3409 the user should preallocate the matrix storage by setting the parameters 3410 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3411 performance can be increased by more than a factor of 50. 3412 3413 Collective on MPI_Comm 3414 3415 Input Parameters: 3416 + comm - MPI communicator 3417 . bs - size of blockk 3418 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3419 This value should be the same as the local size used in creating the 3420 y vector for the matrix-vector product y = Ax. 3421 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3422 This value should be the same as the local size used in creating the 3423 x vector for the matrix-vector product y = Ax. 3424 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3425 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3426 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3427 submatrix (same for all local rows) 3428 . d_nnz - array containing the number of nonzero blocks in the various block rows 3429 of the in diagonal portion of the local (possibly different for each block 3430 row) or PETSC_NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3431 and set it even if it is zero. 3432 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3433 submatrix (same for all local rows). 3434 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3435 off-diagonal portion of the local submatrix (possibly different for 3436 each block row) or PETSC_NULL. 3437 3438 Output Parameter: 3439 . A - the matrix 3440 3441 Options Database Keys: 3442 + -mat_block_size - size of the blocks to use 3443 - -mat_use_hash_table <fact> 3444 3445 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3446 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3447 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3448 3449 Notes: 3450 If the *_nnz parameter is given then the *_nz parameter is ignored 3451 3452 A nonzero block is any block that as 1 or more nonzeros in it 3453 3454 The user MUST specify either the local or global matrix dimensions 3455 (possibly both). 3456 3457 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3458 than it must be used on all processors that share the object for that argument. 3459 3460 Storage Information: 3461 For a square global matrix we define each processor's diagonal portion 3462 to be its local rows and the corresponding columns (a square submatrix); 3463 each processor's off-diagonal portion encompasses the remainder of the 3464 local matrix (a rectangular submatrix). 3465 3466 The user can specify preallocated storage for the diagonal part of 3467 the local submatrix with either d_nz or d_nnz (not both). Set 3468 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3469 memory allocation. Likewise, specify preallocated storage for the 3470 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3471 3472 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3473 the figure below we depict these three local rows and all columns (0-11). 3474 3475 .vb 3476 0 1 2 3 4 5 6 7 8 9 10 11 3477 ------------------- 3478 row 3 | o o o d d d o o o o o o 3479 row 4 | o o o d d d o o o o o o 3480 row 5 | o o o d d d o o o o o o 3481 ------------------- 3482 .ve 3483 3484 Thus, any entries in the d locations are stored in the d (diagonal) 3485 submatrix, and any entries in the o locations are stored in the 3486 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3487 stored simply in the MATSEQBAIJ format for compressed row storage. 3488 3489 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3490 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3491 In general, for PDE problems in which most nonzeros are near the diagonal, 3492 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3493 or you will get TERRIBLE performance; see the users' manual chapter on 3494 matrices. 3495 3496 Level: intermediate 3497 3498 .keywords: matrix, block, aij, compressed row, sparse, parallel 3499 3500 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3501 @*/ 3502 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3503 { 3504 PetscErrorCode ierr; 3505 PetscMPIInt size; 3506 3507 PetscFunctionBegin; 3508 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3509 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3510 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3511 if (size > 1) { 3512 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3513 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3514 } else { 3515 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3516 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3517 } 3518 PetscFunctionReturn(0); 3519 } 3520 3521 #undef __FUNCT__ 3522 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3523 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3524 { 3525 Mat mat; 3526 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3527 PetscErrorCode ierr; 3528 PetscInt len=0; 3529 3530 PetscFunctionBegin; 3531 *newmat = 0; 3532 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3533 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3534 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3535 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3536 3537 mat->factortype = matin->factortype; 3538 mat->preallocated = PETSC_TRUE; 3539 mat->assembled = PETSC_TRUE; 3540 mat->insertmode = NOT_SET_VALUES; 3541 3542 a = (Mat_MPIBAIJ*)mat->data; 3543 mat->rmap->bs = matin->rmap->bs; 3544 a->bs2 = oldmat->bs2; 3545 a->mbs = oldmat->mbs; 3546 a->nbs = oldmat->nbs; 3547 a->Mbs = oldmat->Mbs; 3548 a->Nbs = oldmat->Nbs; 3549 3550 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3551 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3552 3553 a->size = oldmat->size; 3554 a->rank = oldmat->rank; 3555 a->donotstash = oldmat->donotstash; 3556 a->roworiented = oldmat->roworiented; 3557 a->rowindices = 0; 3558 a->rowvalues = 0; 3559 a->getrowactive = PETSC_FALSE; 3560 a->barray = 0; 3561 a->rstartbs = oldmat->rstartbs; 3562 a->rendbs = oldmat->rendbs; 3563 a->cstartbs = oldmat->cstartbs; 3564 a->cendbs = oldmat->cendbs; 3565 3566 /* hash table stuff */ 3567 a->ht = 0; 3568 a->hd = 0; 3569 a->ht_size = 0; 3570 a->ht_flag = oldmat->ht_flag; 3571 a->ht_fact = oldmat->ht_fact; 3572 a->ht_total_ct = 0; 3573 a->ht_insert_ct = 0; 3574 3575 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3576 if (oldmat->colmap) { 3577 #if defined(PETSC_USE_CTABLE) 3578 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3579 #else 3580 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3581 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3582 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3583 #endif 3584 } else a->colmap = 0; 3585 3586 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3587 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3588 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3589 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3590 } else a->garray = 0; 3591 3592 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3593 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3594 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3595 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3596 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3597 3598 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3599 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3600 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3601 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3602 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3603 *newmat = mat; 3604 PetscFunctionReturn(0); 3605 } 3606 3607 #undef __FUNCT__ 3608 #define __FUNCT__ "MatLoad_MPIBAIJ" 3609 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3610 { 3611 PetscErrorCode ierr; 3612 int fd; 3613 PetscInt i,nz,j,rstart,rend; 3614 PetscScalar *vals,*buf; 3615 MPI_Comm comm = ((PetscObject)viewer)->comm; 3616 MPI_Status status; 3617 PetscMPIInt rank,size,maxnz; 3618 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3619 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3620 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3621 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3622 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3623 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3624 3625 PetscFunctionBegin; 3626 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3627 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3628 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3629 3630 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3631 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3632 if (!rank) { 3633 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3634 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3635 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3636 } 3637 3638 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3639 3640 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3641 M = header[1]; N = header[2]; 3642 3643 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3644 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3645 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3646 3647 /* If global sizes are set, check if they are consistent with that given in the file */ 3648 if (sizesset) { 3649 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3650 } 3651 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3652 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3653 3654 if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices"); 3655 3656 /* 3657 This code adds extra rows to make sure the number of rows is 3658 divisible by the blocksize 3659 */ 3660 Mbs = M/bs; 3661 extra_rows = bs - M + bs*Mbs; 3662 if (extra_rows == bs) extra_rows = 0; 3663 else Mbs++; 3664 if (extra_rows && !rank) { 3665 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3666 } 3667 3668 /* determine ownership of all rows */ 3669 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3670 mbs = Mbs/size + ((Mbs % size) > rank); 3671 m = mbs*bs; 3672 } else { /* User set */ 3673 m = newmat->rmap->n; 3674 mbs = m/bs; 3675 } 3676 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3677 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3678 3679 /* process 0 needs enough room for process with most rows */ 3680 if (!rank) { 3681 mmax = rowners[1]; 3682 for (i=2; i<=size; i++) { 3683 mmax = PetscMax(mmax,rowners[i]); 3684 } 3685 mmax*=bs; 3686 } else mmax = m; 3687 3688 rowners[0] = 0; 3689 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3690 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3691 rstart = rowners[rank]; 3692 rend = rowners[rank+1]; 3693 3694 /* distribute row lengths to all processors */ 3695 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3696 if (!rank) { 3697 mend = m; 3698 if (size == 1) mend = mend - extra_rows; 3699 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3700 for (j=mend; j<m; j++) locrowlens[j] = 1; 3701 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3702 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3703 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3704 for (j=0; j<m; j++) { 3705 procsnz[0] += locrowlens[j]; 3706 } 3707 for (i=1; i<size; i++) { 3708 mend = browners[i+1] - browners[i]; 3709 if (i == size-1) mend = mend - extra_rows; 3710 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3711 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3712 /* calculate the number of nonzeros on each processor */ 3713 for (j=0; j<browners[i+1]-browners[i]; j++) { 3714 procsnz[i] += rowlengths[j]; 3715 } 3716 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3717 } 3718 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3719 } else { 3720 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3721 } 3722 3723 if (!rank) { 3724 /* determine max buffer needed and allocate it */ 3725 maxnz = procsnz[0]; 3726 for (i=1; i<size; i++) { 3727 maxnz = PetscMax(maxnz,procsnz[i]); 3728 } 3729 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3730 3731 /* read in my part of the matrix column indices */ 3732 nz = procsnz[0]; 3733 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3734 mycols = ibuf; 3735 if (size == 1) nz -= extra_rows; 3736 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3737 if (size == 1) { 3738 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 3739 } 3740 3741 /* read in every ones (except the last) and ship off */ 3742 for (i=1; i<size-1; i++) { 3743 nz = procsnz[i]; 3744 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3745 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3746 } 3747 /* read in the stuff for the last proc */ 3748 if (size != 1) { 3749 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3750 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3751 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3752 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3753 } 3754 ierr = PetscFree(cols);CHKERRQ(ierr); 3755 } else { 3756 /* determine buffer space needed for message */ 3757 nz = 0; 3758 for (i=0; i<m; i++) { 3759 nz += locrowlens[i]; 3760 } 3761 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3762 mycols = ibuf; 3763 /* receive message of column indices*/ 3764 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3765 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3766 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3767 } 3768 3769 /* loop over local rows, determining number of off diagonal entries */ 3770 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3771 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3772 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3773 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3774 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3775 rowcount = 0; nzcount = 0; 3776 for (i=0; i<mbs; i++) { 3777 dcount = 0; 3778 odcount = 0; 3779 for (j=0; j<bs; j++) { 3780 kmax = locrowlens[rowcount]; 3781 for (k=0; k<kmax; k++) { 3782 tmp = mycols[nzcount++]/bs; 3783 if (!mask[tmp]) { 3784 mask[tmp] = 1; 3785 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3786 else masked1[dcount++] = tmp; 3787 } 3788 } 3789 rowcount++; 3790 } 3791 3792 dlens[i] = dcount; 3793 odlens[i] = odcount; 3794 3795 /* zero out the mask elements we set */ 3796 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3797 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3798 } 3799 3800 3801 if (!sizesset) { 3802 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3803 } 3804 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3805 3806 if (!rank) { 3807 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3808 /* read in my part of the matrix numerical values */ 3809 nz = procsnz[0]; 3810 vals = buf; 3811 mycols = ibuf; 3812 if (size == 1) nz -= extra_rows; 3813 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3814 if (size == 1) { 3815 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 3816 } 3817 3818 /* insert into matrix */ 3819 jj = rstart*bs; 3820 for (i=0; i<m; i++) { 3821 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3822 mycols += locrowlens[i]; 3823 vals += locrowlens[i]; 3824 jj++; 3825 } 3826 /* read in other processors (except the last one) and ship out */ 3827 for (i=1; i<size-1; i++) { 3828 nz = procsnz[i]; 3829 vals = buf; 3830 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3831 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3832 } 3833 /* the last proc */ 3834 if (size != 1) { 3835 nz = procsnz[i] - extra_rows; 3836 vals = buf; 3837 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3838 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3839 ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3840 } 3841 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3842 } else { 3843 /* receive numeric values */ 3844 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3845 3846 /* receive message of values*/ 3847 vals = buf; 3848 mycols = ibuf; 3849 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3850 3851 /* insert into matrix */ 3852 jj = rstart*bs; 3853 for (i=0; i<m; i++) { 3854 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3855 mycols += locrowlens[i]; 3856 vals += locrowlens[i]; 3857 jj++; 3858 } 3859 } 3860 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3861 ierr = PetscFree(buf);CHKERRQ(ierr); 3862 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3863 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3864 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3865 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3866 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3867 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3868 PetscFunctionReturn(0); 3869 } 3870 3871 #undef __FUNCT__ 3872 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3873 /*@ 3874 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3875 3876 Input Parameters: 3877 . mat - the matrix 3878 . fact - factor 3879 3880 Not Collective, each process can use a different factor 3881 3882 Level: advanced 3883 3884 Notes: 3885 This can also be set by the command line option: -mat_use_hash_table <fact> 3886 3887 .keywords: matrix, hashtable, factor, HT 3888 3889 .seealso: MatSetOption() 3890 @*/ 3891 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3892 { 3893 PetscErrorCode ierr; 3894 3895 PetscFunctionBegin; 3896 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3897 PetscFunctionReturn(0); 3898 } 3899 3900 EXTERN_C_BEGIN 3901 #undef __FUNCT__ 3902 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3903 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3904 { 3905 Mat_MPIBAIJ *baij; 3906 3907 PetscFunctionBegin; 3908 baij = (Mat_MPIBAIJ*)mat->data; 3909 baij->ht_fact = fact; 3910 PetscFunctionReturn(0); 3911 } 3912 EXTERN_C_END 3913 3914 #undef __FUNCT__ 3915 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3916 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3917 { 3918 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3919 3920 PetscFunctionBegin; 3921 *Ad = a->A; 3922 *Ao = a->B; 3923 *colmap = a->garray; 3924 PetscFunctionReturn(0); 3925 } 3926 3927 /* 3928 Special version for direct calls from Fortran (to eliminate two function call overheads 3929 */ 3930 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3931 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3932 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3933 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3934 #endif 3935 3936 #undef __FUNCT__ 3937 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3938 /*@C 3939 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3940 3941 Collective on Mat 3942 3943 Input Parameters: 3944 + mat - the matrix 3945 . min - number of input rows 3946 . im - input rows 3947 . nin - number of input columns 3948 . in - input columns 3949 . v - numerical values input 3950 - addvin - INSERT_VALUES or ADD_VALUES 3951 3952 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3953 3954 Level: advanced 3955 3956 .seealso: MatSetValuesBlocked() 3957 @*/ 3958 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3959 { 3960 /* convert input arguments to C version */ 3961 Mat mat = *matin; 3962 PetscInt m = *min, n = *nin; 3963 InsertMode addv = *addvin; 3964 3965 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3966 const MatScalar *value; 3967 MatScalar *barray = baij->barray; 3968 PetscBool roworiented = baij->roworiented; 3969 PetscErrorCode ierr; 3970 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3971 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3972 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3973 3974 PetscFunctionBegin; 3975 /* tasks normally handled by MatSetValuesBlocked() */ 3976 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3977 #if defined(PETSC_USE_DEBUG) 3978 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3979 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3980 #endif 3981 if (mat->assembled) { 3982 mat->was_assembled = PETSC_TRUE; 3983 mat->assembled = PETSC_FALSE; 3984 } 3985 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3986 3987 3988 if (!barray) { 3989 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3990 baij->barray = barray; 3991 } 3992 3993 if (roworiented) stepval = (n-1)*bs; 3994 else stepval = (m-1)*bs; 3995 3996 for (i=0; i<m; i++) { 3997 if (im[i] < 0) continue; 3998 #if defined(PETSC_USE_DEBUG) 3999 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 4000 #endif 4001 if (im[i] >= rstart && im[i] < rend) { 4002 row = im[i] - rstart; 4003 for (j=0; j<n; j++) { 4004 /* If NumCol = 1 then a copy is not required */ 4005 if ((roworiented) && (n == 1)) { 4006 barray = (MatScalar*)v + i*bs2; 4007 } else if ((!roworiented) && (m == 1)) { 4008 barray = (MatScalar*)v + j*bs2; 4009 } else { /* Here a copy is required */ 4010 if (roworiented) { 4011 value = v + i*(stepval+bs)*bs + j*bs; 4012 } else { 4013 value = v + j*(stepval+bs)*bs + i*bs; 4014 } 4015 for (ii=0; ii<bs; ii++,value+=stepval) { 4016 for (jj=0; jj<bs; jj++) { 4017 *barray++ = *value++; 4018 } 4019 } 4020 barray -=bs2; 4021 } 4022 4023 if (in[j] >= cstart && in[j] < cend) { 4024 col = in[j] - cstart; 4025 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4026 } else if (in[j] < 0) continue; 4027 #if defined(PETSC_USE_DEBUG) 4028 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 4029 #endif 4030 else { 4031 if (mat->was_assembled) { 4032 if (!baij->colmap) { 4033 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 4034 } 4035 4036 #if defined(PETSC_USE_DEBUG) 4037 #if defined(PETSC_USE_CTABLE) 4038 { PetscInt data; 4039 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 4040 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4041 } 4042 #else 4043 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4044 #endif 4045 #endif 4046 #if defined(PETSC_USE_CTABLE) 4047 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4048 col = (col - 1)/bs; 4049 #else 4050 col = (baij->colmap[in[j]] - 1)/bs; 4051 #endif 4052 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 4053 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 4054 col = in[j]; 4055 } 4056 } else col = in[j]; 4057 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4058 } 4059 } 4060 } else { 4061 if (!baij->donotstash) { 4062 if (roworiented) { 4063 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4064 } else { 4065 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4066 } 4067 } 4068 } 4069 } 4070 4071 /* task normally handled by MatSetValuesBlocked() */ 4072 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4073 PetscFunctionReturn(0); 4074 } 4075 4076 #undef __FUNCT__ 4077 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 4078 /*@ 4079 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 4080 CSR format the local rows. 4081 4082 Collective on MPI_Comm 4083 4084 Input Parameters: 4085 + comm - MPI communicator 4086 . bs - the block size, only a block size of 1 is supported 4087 . m - number of local rows (Cannot be PETSC_DECIDE) 4088 . n - This value should be the same as the local size used in creating the 4089 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4090 calculated if N is given) For square matrices n is almost always m. 4091 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4092 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4093 . i - row indices 4094 . j - column indices 4095 - a - matrix values 4096 4097 Output Parameter: 4098 . mat - the matrix 4099 4100 Level: intermediate 4101 4102 Notes: 4103 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4104 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4105 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4106 4107 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4108 4109 .keywords: matrix, aij, compressed row, sparse, parallel 4110 4111 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4112 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4113 @*/ 4114 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4115 { 4116 PetscErrorCode ierr; 4117 4118 PetscFunctionBegin; 4119 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4120 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4121 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4122 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4123 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 4124 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 4125 PetscFunctionReturn(0); 4126 } 4127