1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat); 7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec); 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 17 { 18 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 19 PetscErrorCode ierr; 20 PetscInt i,*idxb = 0; 21 PetscScalar *va,*vb; 22 Vec vtmp; 23 24 PetscFunctionBegin; 25 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 26 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 27 if (idx) { 28 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 29 } 30 31 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 32 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 33 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 34 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 35 36 for (i=0; i<A->rmap->n; i++){ 37 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 38 } 39 40 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 41 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 42 ierr = PetscFree(idxb);CHKERRQ(ierr); 43 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 44 PetscFunctionReturn(0); 45 } 46 47 EXTERN_C_BEGIN 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 50 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 51 { 52 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 53 PetscErrorCode ierr; 54 55 PetscFunctionBegin; 56 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 57 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 EXTERN_C_END 61 62 EXTERN_C_BEGIN 63 #undef __FUNCT__ 64 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 65 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 66 { 67 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 68 PetscErrorCode ierr; 69 70 PetscFunctionBegin; 71 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 72 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 73 PetscFunctionReturn(0); 74 } 75 EXTERN_C_END 76 77 /* 78 Local utility routine that creates a mapping from the global column 79 number to the local number in the off-diagonal part of the local 80 storage of the matrix. This is done in a non scalable way since the 81 length of colmap equals the global matrix length. 82 */ 83 #undef __FUNCT__ 84 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private" 85 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 86 { 87 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 88 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 89 PetscErrorCode ierr; 90 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 91 92 PetscFunctionBegin; 93 #if defined (PETSC_USE_CTABLE) 94 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 95 for (i=0; i<nbs; i++){ 96 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 97 } 98 #else 99 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 100 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 101 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 102 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 103 #endif 104 PetscFunctionReturn(0); 105 } 106 107 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 108 { \ 109 \ 110 brow = row/bs; \ 111 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 112 rmax = aimax[brow]; nrow = ailen[brow]; \ 113 bcol = col/bs; \ 114 ridx = row % bs; cidx = col % bs; \ 115 low = 0; high = nrow; \ 116 while (high-low > 3) { \ 117 t = (low+high)/2; \ 118 if (rp[t] > bcol) high = t; \ 119 else low = t; \ 120 } \ 121 for (_i=low; _i<high; _i++) { \ 122 if (rp[_i] > bcol) break; \ 123 if (rp[_i] == bcol) { \ 124 bap = ap + bs2*_i + bs*cidx + ridx; \ 125 if (addv == ADD_VALUES) *bap += value; \ 126 else *bap = value; \ 127 goto a_noinsert; \ 128 } \ 129 } \ 130 if (a->nonew == 1) goto a_noinsert; \ 131 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 132 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 133 N = nrow++ - 1; \ 134 /* shift up all the later entries in this row */ \ 135 for (ii=N; ii>=_i; ii--) { \ 136 rp[ii+1] = rp[ii]; \ 137 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 138 } \ 139 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 140 rp[_i] = bcol; \ 141 ap[bs2*_i + bs*cidx + ridx] = value; \ 142 a_noinsert:; \ 143 ailen[brow] = nrow; \ 144 } 145 146 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 147 { \ 148 brow = row/bs; \ 149 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 150 rmax = bimax[brow]; nrow = bilen[brow]; \ 151 bcol = col/bs; \ 152 ridx = row % bs; cidx = col % bs; \ 153 low = 0; high = nrow; \ 154 while (high-low > 3) { \ 155 t = (low+high)/2; \ 156 if (rp[t] > bcol) high = t; \ 157 else low = t; \ 158 } \ 159 for (_i=low; _i<high; _i++) { \ 160 if (rp[_i] > bcol) break; \ 161 if (rp[_i] == bcol) { \ 162 bap = ap + bs2*_i + bs*cidx + ridx; \ 163 if (addv == ADD_VALUES) *bap += value; \ 164 else *bap = value; \ 165 goto b_noinsert; \ 166 } \ 167 } \ 168 if (b->nonew == 1) goto b_noinsert; \ 169 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 170 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 171 CHKMEMQ;\ 172 N = nrow++ - 1; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 177 } \ 178 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 179 rp[_i] = bcol; \ 180 ap[bs2*_i + bs*cidx + ridx] = value; \ 181 b_noinsert:; \ 182 bilen[brow] = nrow; \ 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatSetValues_MPIBAIJ" 187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 188 { 189 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 190 MatScalar value; 191 PetscBool roworiented = baij->roworiented; 192 PetscErrorCode ierr; 193 PetscInt i,j,row,col; 194 PetscInt rstart_orig=mat->rmap->rstart; 195 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 196 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 197 198 /* Some Variables required in the macro */ 199 Mat A = baij->A; 200 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 201 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 202 MatScalar *aa=a->a; 203 204 Mat B = baij->B; 205 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 206 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 207 MatScalar *ba=b->a; 208 209 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 210 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 211 MatScalar *ap,*bap; 212 213 PetscFunctionBegin; 214 if (v) PetscValidScalarPointer(v,6); 215 for (i=0; i<m; i++) { 216 if (im[i] < 0) continue; 217 #if defined(PETSC_USE_DEBUG) 218 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 219 #endif 220 if (im[i] >= rstart_orig && im[i] < rend_orig) { 221 row = im[i] - rstart_orig; 222 for (j=0; j<n; j++) { 223 if (in[j] >= cstart_orig && in[j] < cend_orig){ 224 col = in[j] - cstart_orig; 225 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 226 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 227 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 228 } else if (in[j] < 0) continue; 229 #if defined(PETSC_USE_DEBUG) 230 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 231 #endif 232 else { 233 if (mat->was_assembled) { 234 if (!baij->colmap) { 235 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 236 } 237 #if defined (PETSC_USE_CTABLE) 238 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 239 col = col - 1; 240 #else 241 col = baij->colmap[in[j]/bs] - 1; 242 #endif 243 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 244 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 245 col = in[j]; 246 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 247 B = baij->B; 248 b = (Mat_SeqBAIJ*)(B)->data; 249 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 250 ba=b->a; 251 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 252 else col += in[j]%bs; 253 } else col = in[j]; 254 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 255 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 256 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 257 } 258 } 259 } else { 260 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 261 if (!baij->donotstash) { 262 mat->assembled = PETSC_FALSE; 263 if (roworiented) { 264 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 265 } else { 266 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 267 } 268 } 269 } 270 } 271 PetscFunctionReturn(0); 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 276 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 277 { 278 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 279 const PetscScalar *value; 280 MatScalar *barray=baij->barray; 281 PetscBool roworiented = baij->roworiented; 282 PetscErrorCode ierr; 283 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 284 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 285 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 286 287 PetscFunctionBegin; 288 if (!barray) { 289 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 290 baij->barray = barray; 291 } 292 293 if (roworiented) { 294 stepval = (n-1)*bs; 295 } else { 296 stepval = (m-1)*bs; 297 } 298 for (i=0; i<m; i++) { 299 if (im[i] < 0) continue; 300 #if defined(PETSC_USE_DEBUG) 301 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 302 #endif 303 if (im[i] >= rstart && im[i] < rend) { 304 row = im[i] - rstart; 305 for (j=0; j<n; j++) { 306 /* If NumCol = 1 then a copy is not required */ 307 if ((roworiented) && (n == 1)) { 308 barray = (MatScalar*)v + i*bs2; 309 } else if ((!roworiented) && (m == 1)) { 310 barray = (MatScalar*)v + j*bs2; 311 } else { /* Here a copy is required */ 312 if (roworiented) { 313 value = v + (i*(stepval+bs) + j)*bs; 314 } else { 315 value = v + (j*(stepval+bs) + i)*bs; 316 } 317 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 318 for (jj=0; jj<bs; jj++) { 319 barray[jj] = value[jj]; 320 } 321 barray += bs; 322 } 323 barray -= bs2; 324 } 325 326 if (in[j] >= cstart && in[j] < cend){ 327 col = in[j] - cstart; 328 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 329 } 330 else if (in[j] < 0) continue; 331 #if defined(PETSC_USE_DEBUG) 332 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 333 #endif 334 else { 335 if (mat->was_assembled) { 336 if (!baij->colmap) { 337 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 338 } 339 340 #if defined(PETSC_USE_DEBUG) 341 #if defined (PETSC_USE_CTABLE) 342 { PetscInt data; 343 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 344 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 345 } 346 #else 347 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 348 #endif 349 #endif 350 #if defined (PETSC_USE_CTABLE) 351 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 352 col = (col - 1)/bs; 353 #else 354 col = (baij->colmap[in[j]] - 1)/bs; 355 #endif 356 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 357 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 358 col = in[j]; 359 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]); 360 } 361 else col = in[j]; 362 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 363 } 364 } 365 } else { 366 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 367 if (!baij->donotstash) { 368 if (roworiented) { 369 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 370 } else { 371 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 372 } 373 } 374 } 375 } 376 PetscFunctionReturn(0); 377 } 378 379 #define HASH_KEY 0.6180339887 380 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 381 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 382 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 383 #undef __FUNCT__ 384 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 385 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 386 { 387 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 388 PetscBool roworiented = baij->roworiented; 389 PetscErrorCode ierr; 390 PetscInt i,j,row,col; 391 PetscInt rstart_orig=mat->rmap->rstart; 392 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 393 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 394 PetscReal tmp; 395 MatScalar **HD = baij->hd,value; 396 #if defined(PETSC_USE_DEBUG) 397 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 398 #endif 399 400 PetscFunctionBegin; 401 if (v) PetscValidScalarPointer(v,6); 402 for (i=0; i<m; i++) { 403 #if defined(PETSC_USE_DEBUG) 404 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 405 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 406 #endif 407 row = im[i]; 408 if (row >= rstart_orig && row < rend_orig) { 409 for (j=0; j<n; j++) { 410 col = in[j]; 411 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 412 /* Look up PetscInto the Hash Table */ 413 key = (row/bs)*Nbs+(col/bs)+1; 414 h1 = HASH(size,key,tmp); 415 416 417 idx = h1; 418 #if defined(PETSC_USE_DEBUG) 419 insert_ct++; 420 total_ct++; 421 if (HT[idx] != key) { 422 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 423 if (idx == size) { 424 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 425 if (idx == h1) { 426 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 427 } 428 } 429 } 430 #else 431 if (HT[idx] != key) { 432 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 433 if (idx == size) { 434 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 435 if (idx == h1) { 436 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 437 } 438 } 439 } 440 #endif 441 /* A HASH table entry is found, so insert the values at the correct address */ 442 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 443 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 444 } 445 } else { 446 if (!baij->donotstash) { 447 if (roworiented) { 448 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 449 } else { 450 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 451 } 452 } 453 } 454 } 455 #if defined(PETSC_USE_DEBUG) 456 baij->ht_total_ct = total_ct; 457 baij->ht_insert_ct = insert_ct; 458 #endif 459 PetscFunctionReturn(0); 460 } 461 462 #undef __FUNCT__ 463 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 464 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 465 { 466 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 467 PetscBool roworiented = baij->roworiented; 468 PetscErrorCode ierr; 469 PetscInt i,j,ii,jj,row,col; 470 PetscInt rstart=baij->rstartbs; 471 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 472 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 473 PetscReal tmp; 474 MatScalar **HD = baij->hd,*baij_a; 475 const PetscScalar *v_t,*value; 476 #if defined(PETSC_USE_DEBUG) 477 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 478 #endif 479 480 PetscFunctionBegin; 481 482 if (roworiented) { 483 stepval = (n-1)*bs; 484 } else { 485 stepval = (m-1)*bs; 486 } 487 for (i=0; i<m; i++) { 488 #if defined(PETSC_USE_DEBUG) 489 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 490 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 491 #endif 492 row = im[i]; 493 v_t = v + i*nbs2; 494 if (row >= rstart && row < rend) { 495 for (j=0; j<n; j++) { 496 col = in[j]; 497 498 /* Look up into the Hash Table */ 499 key = row*Nbs+col+1; 500 h1 = HASH(size,key,tmp); 501 502 idx = h1; 503 #if defined(PETSC_USE_DEBUG) 504 total_ct++; 505 insert_ct++; 506 if (HT[idx] != key) { 507 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 508 if (idx == size) { 509 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 510 if (idx == h1) { 511 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 512 } 513 } 514 } 515 #else 516 if (HT[idx] != key) { 517 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 518 if (idx == size) { 519 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 520 if (idx == h1) { 521 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 522 } 523 } 524 } 525 #endif 526 baij_a = HD[idx]; 527 if (roworiented) { 528 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 529 /* value = v + (i*(stepval+bs)+j)*bs; */ 530 value = v_t; 531 v_t += bs; 532 if (addv == ADD_VALUES) { 533 for (ii=0; ii<bs; ii++,value+=stepval) { 534 for (jj=ii; jj<bs2; jj+=bs) { 535 baij_a[jj] += *value++; 536 } 537 } 538 } else { 539 for (ii=0; ii<bs; ii++,value+=stepval) { 540 for (jj=ii; jj<bs2; jj+=bs) { 541 baij_a[jj] = *value++; 542 } 543 } 544 } 545 } else { 546 value = v + j*(stepval+bs)*bs + i*bs; 547 if (addv == ADD_VALUES) { 548 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 549 for (jj=0; jj<bs; jj++) { 550 baij_a[jj] += *value++; 551 } 552 } 553 } else { 554 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 555 for (jj=0; jj<bs; jj++) { 556 baij_a[jj] = *value++; 557 } 558 } 559 } 560 } 561 } 562 } else { 563 if (!baij->donotstash) { 564 if (roworiented) { 565 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 566 } else { 567 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 568 } 569 } 570 } 571 } 572 #if defined(PETSC_USE_DEBUG) 573 baij->ht_total_ct = total_ct; 574 baij->ht_insert_ct = insert_ct; 575 #endif 576 PetscFunctionReturn(0); 577 } 578 579 #undef __FUNCT__ 580 #define __FUNCT__ "MatGetValues_MPIBAIJ" 581 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 582 { 583 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 584 PetscErrorCode ierr; 585 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 586 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 587 588 PetscFunctionBegin; 589 for (i=0; i<m; i++) { 590 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 591 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 592 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 593 row = idxm[i] - bsrstart; 594 for (j=0; j<n; j++) { 595 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 596 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 597 if (idxn[j] >= bscstart && idxn[j] < bscend){ 598 col = idxn[j] - bscstart; 599 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 600 } else { 601 if (!baij->colmap) { 602 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 603 } 604 #if defined (PETSC_USE_CTABLE) 605 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 606 data --; 607 #else 608 data = baij->colmap[idxn[j]/bs]-1; 609 #endif 610 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 611 else { 612 col = data + idxn[j]%bs; 613 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 614 } 615 } 616 } 617 } else { 618 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 619 } 620 } 621 PetscFunctionReturn(0); 622 } 623 624 #undef __FUNCT__ 625 #define __FUNCT__ "MatNorm_MPIBAIJ" 626 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 627 { 628 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 629 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 630 PetscErrorCode ierr; 631 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 632 PetscReal sum = 0.0; 633 MatScalar *v; 634 635 PetscFunctionBegin; 636 if (baij->size == 1) { 637 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 638 } else { 639 if (type == NORM_FROBENIUS) { 640 v = amat->a; 641 nz = amat->nz*bs2; 642 for (i=0; i<nz; i++) { 643 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 644 } 645 v = bmat->a; 646 nz = bmat->nz*bs2; 647 for (i=0; i<nz; i++) { 648 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 649 } 650 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 651 *nrm = PetscSqrtReal(*nrm); 652 } else if (type == NORM_1) { /* max column sum */ 653 PetscReal *tmp,*tmp2; 654 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 655 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 656 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 657 v = amat->a; jj = amat->j; 658 for (i=0; i<amat->nz; i++) { 659 for (j=0; j<bs; j++){ 660 col = bs*(cstart + *jj) + j; /* column index */ 661 for (row=0; row<bs; row++){ 662 tmp[col] += PetscAbsScalar(*v); v++; 663 } 664 } 665 jj++; 666 } 667 v = bmat->a; jj = bmat->j; 668 for (i=0; i<bmat->nz; i++) { 669 for (j=0; j<bs; j++){ 670 col = bs*garray[*jj] + j; 671 for (row=0; row<bs; row++){ 672 tmp[col] += PetscAbsScalar(*v); v++; 673 } 674 } 675 jj++; 676 } 677 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 678 *nrm = 0.0; 679 for (j=0; j<mat->cmap->N; j++) { 680 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 681 } 682 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 683 } else if (type == NORM_INFINITY) { /* max row sum */ 684 PetscReal *sums; 685 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 686 sum = 0.0; 687 for (j=0; j<amat->mbs; j++) { 688 for (row=0; row<bs; row++) sums[row] = 0.0; 689 v = amat->a + bs2*amat->i[j]; 690 nz = amat->i[j+1]-amat->i[j]; 691 for (i=0; i<nz; i++) { 692 for (col=0; col<bs; col++){ 693 for (row=0; row<bs; row++){ 694 sums[row] += PetscAbsScalar(*v); v++; 695 } 696 } 697 } 698 v = bmat->a + bs2*bmat->i[j]; 699 nz = bmat->i[j+1]-bmat->i[j]; 700 for (i=0; i<nz; i++) { 701 for (col=0; col<bs; col++){ 702 for (row=0; row<bs; row++){ 703 sums[row] += PetscAbsScalar(*v); v++; 704 } 705 } 706 } 707 for (row=0; row<bs; row++){ 708 if (sums[row] > sum) sum = sums[row]; 709 } 710 } 711 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 712 ierr = PetscFree(sums);CHKERRQ(ierr); 713 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet"); 714 } 715 PetscFunctionReturn(0); 716 } 717 718 /* 719 Creates the hash table, and sets the table 720 This table is created only once. 721 If new entried need to be added to the matrix 722 then the hash table has to be destroyed and 723 recreated. 724 */ 725 #undef __FUNCT__ 726 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 727 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 728 { 729 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 730 Mat A = baij->A,B=baij->B; 731 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 732 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 733 PetscErrorCode ierr; 734 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 735 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 736 PetscInt *HT,key; 737 MatScalar **HD; 738 PetscReal tmp; 739 #if defined(PETSC_USE_INFO) 740 PetscInt ct=0,max=0; 741 #endif 742 743 PetscFunctionBegin; 744 if (baij->ht) PetscFunctionReturn(0); 745 746 baij->ht_size = (PetscInt)(factor*nz); 747 ht_size = baij->ht_size; 748 749 /* Allocate Memory for Hash Table */ 750 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 751 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 752 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 753 HD = baij->hd; 754 HT = baij->ht; 755 756 /* Loop Over A */ 757 for (i=0; i<a->mbs; i++) { 758 for (j=ai[i]; j<ai[i+1]; j++) { 759 row = i+rstart; 760 col = aj[j]+cstart; 761 762 key = row*Nbs + col + 1; 763 h1 = HASH(ht_size,key,tmp); 764 for (k=0; k<ht_size; k++){ 765 if (!HT[(h1+k)%ht_size]) { 766 HT[(h1+k)%ht_size] = key; 767 HD[(h1+k)%ht_size] = a->a + j*bs2; 768 break; 769 #if defined(PETSC_USE_INFO) 770 } else { 771 ct++; 772 #endif 773 } 774 } 775 #if defined(PETSC_USE_INFO) 776 if (k> max) max = k; 777 #endif 778 } 779 } 780 /* Loop Over B */ 781 for (i=0; i<b->mbs; i++) { 782 for (j=bi[i]; j<bi[i+1]; j++) { 783 row = i+rstart; 784 col = garray[bj[j]]; 785 key = row*Nbs + col + 1; 786 h1 = HASH(ht_size,key,tmp); 787 for (k=0; k<ht_size; k++){ 788 if (!HT[(h1+k)%ht_size]) { 789 HT[(h1+k)%ht_size] = key; 790 HD[(h1+k)%ht_size] = b->a + j*bs2; 791 break; 792 #if defined(PETSC_USE_INFO) 793 } else { 794 ct++; 795 #endif 796 } 797 } 798 #if defined(PETSC_USE_INFO) 799 if (k> max) max = k; 800 #endif 801 } 802 } 803 804 /* Print Summary */ 805 #if defined(PETSC_USE_INFO) 806 for (i=0,j=0; i<ht_size; i++) { 807 if (HT[i]) {j++;} 808 } 809 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 810 #endif 811 PetscFunctionReturn(0); 812 } 813 814 #undef __FUNCT__ 815 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 816 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 817 { 818 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 819 PetscErrorCode ierr; 820 PetscInt nstash,reallocs; 821 InsertMode addv; 822 823 PetscFunctionBegin; 824 if (baij->donotstash || mat->nooffprocentries) { 825 PetscFunctionReturn(0); 826 } 827 828 /* make sure all processors are either in INSERTMODE or ADDMODE */ 829 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 830 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 831 mat->insertmode = addv; /* in case this processor had no cache */ 832 833 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 834 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 835 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 836 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 837 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 838 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 839 PetscFunctionReturn(0); 840 } 841 842 #undef __FUNCT__ 843 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 844 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 845 { 846 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 847 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 848 PetscErrorCode ierr; 849 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 850 PetscInt *row,*col; 851 PetscBool r1,r2,r3,other_disassembled; 852 MatScalar *val; 853 InsertMode addv = mat->insertmode; 854 PetscMPIInt n; 855 856 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 857 PetscFunctionBegin; 858 if (!baij->donotstash && !mat->nooffprocentries) { 859 while (1) { 860 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 861 if (!flg) break; 862 863 for (i=0; i<n;) { 864 /* Now identify the consecutive vals belonging to the same row */ 865 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 866 if (j < n) ncols = j-i; 867 else ncols = n-i; 868 /* Now assemble all these values with a single function call */ 869 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 870 i = j; 871 } 872 } 873 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 874 /* Now process the block-stash. Since the values are stashed column-oriented, 875 set the roworiented flag to column oriented, and after MatSetValues() 876 restore the original flags */ 877 r1 = baij->roworiented; 878 r2 = a->roworiented; 879 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 880 baij->roworiented = PETSC_FALSE; 881 a->roworiented = PETSC_FALSE; 882 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 883 while (1) { 884 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 885 if (!flg) break; 886 887 for (i=0; i<n;) { 888 /* Now identify the consecutive vals belonging to the same row */ 889 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 890 if (j < n) ncols = j-i; 891 else ncols = n-i; 892 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 893 i = j; 894 } 895 } 896 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 897 baij->roworiented = r1; 898 a->roworiented = r2; 899 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 900 } 901 902 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 903 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 904 905 /* determine if any processor has disassembled, if so we must 906 also disassemble ourselfs, in order that we may reassemble. */ 907 /* 908 if nonzero structure of submatrix B cannot change then we know that 909 no processor disassembled thus we can skip this stuff 910 */ 911 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 912 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 913 if (mat->was_assembled && !other_disassembled) { 914 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 915 } 916 } 917 918 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 919 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 920 } 921 ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 922 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 923 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 924 925 #if defined(PETSC_USE_INFO) 926 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 927 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 928 baij->ht_total_ct = 0; 929 baij->ht_insert_ct = 0; 930 } 931 #endif 932 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 933 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 934 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 935 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 936 } 937 938 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 939 baij->rowvalues = 0; 940 PetscFunctionReturn(0); 941 } 942 943 #undef __FUNCT__ 944 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 945 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 946 { 947 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 948 PetscErrorCode ierr; 949 PetscMPIInt size = baij->size,rank = baij->rank; 950 PetscInt bs = mat->rmap->bs; 951 PetscBool iascii,isdraw; 952 PetscViewer sviewer; 953 PetscViewerFormat format; 954 955 PetscFunctionBegin; 956 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 957 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 958 if (iascii) { 959 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 960 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 961 MatInfo info; 962 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 963 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 964 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 965 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 966 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 967 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 968 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 969 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 970 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 971 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 972 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 973 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 974 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 975 PetscFunctionReturn(0); 976 } else if (format == PETSC_VIEWER_ASCII_INFO) { 977 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 978 PetscFunctionReturn(0); 979 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 980 PetscFunctionReturn(0); 981 } 982 } 983 984 if (isdraw) { 985 PetscDraw draw; 986 PetscBool isnull; 987 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 988 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 989 } 990 991 if (size == 1) { 992 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 993 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 994 } else { 995 /* assemble the entire matrix onto first processor. */ 996 Mat A; 997 Mat_SeqBAIJ *Aloc; 998 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 999 MatScalar *a; 1000 1001 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1002 /* Perhaps this should be the type of mat? */ 1003 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1004 if (!rank) { 1005 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1006 } else { 1007 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1008 } 1009 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1010 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1011 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1012 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1013 1014 /* copy over the A part */ 1015 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1016 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1017 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1018 1019 for (i=0; i<mbs; i++) { 1020 rvals[0] = bs*(baij->rstartbs + i); 1021 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1022 for (j=ai[i]; j<ai[i+1]; j++) { 1023 col = (baij->cstartbs+aj[j])*bs; 1024 for (k=0; k<bs; k++) { 1025 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1026 col++; a += bs; 1027 } 1028 } 1029 } 1030 /* copy over the B part */ 1031 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1032 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1033 for (i=0; i<mbs; i++) { 1034 rvals[0] = bs*(baij->rstartbs + i); 1035 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1036 for (j=ai[i]; j<ai[i+1]; j++) { 1037 col = baij->garray[aj[j]]*bs; 1038 for (k=0; k<bs; k++) { 1039 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1040 col++; a += bs; 1041 } 1042 } 1043 } 1044 ierr = PetscFree(rvals);CHKERRQ(ierr); 1045 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1046 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1047 /* 1048 Everyone has to call to draw the matrix since the graphics waits are 1049 synchronized across all processors that share the PetscDraw object 1050 */ 1051 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1052 if (!rank) { 1053 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1054 /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/ 1055 PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ); 1056 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1057 } 1058 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1059 ierr = MatDestroy(&A);CHKERRQ(ierr); 1060 } 1061 PetscFunctionReturn(0); 1062 } 1063 1064 #undef __FUNCT__ 1065 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1066 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1067 { 1068 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1069 Mat_SeqBAIJ* A = (Mat_SeqBAIJ*)a->A->data; 1070 Mat_SeqBAIJ* B = (Mat_SeqBAIJ*)a->B->data; 1071 PetscErrorCode ierr; 1072 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1073 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1074 int fd; 1075 PetscScalar *column_values; 1076 FILE *file; 1077 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1078 PetscInt message_count,flowcontrolcount; 1079 1080 PetscFunctionBegin; 1081 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1082 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1083 nz = bs2*(A->nz + B->nz); 1084 rlen = mat->rmap->n; 1085 if (!rank) { 1086 header[0] = MAT_FILE_CLASSID; 1087 header[1] = mat->rmap->N; 1088 header[2] = mat->cmap->N; 1089 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1090 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1091 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1092 /* get largest number of rows any processor has */ 1093 range = mat->rmap->range; 1094 for (i=1; i<size; i++) { 1095 rlen = PetscMax(rlen,range[i+1] - range[i]); 1096 } 1097 } else { 1098 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1099 } 1100 1101 ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr); 1102 /* compute lengths of each row */ 1103 for (i=0; i<a->mbs; i++) { 1104 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1105 } 1106 /* store the row lengths to the file */ 1107 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1108 if (!rank) { 1109 MPI_Status status; 1110 ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr); 1111 rlen = (range[1] - range[0])/bs; 1112 for (i=0; i<rlen; i++) { 1113 for (j=0; j<bs; j++) { 1114 row_lens[i*bs+j] = bs*crow_lens[i]; 1115 } 1116 } 1117 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1118 for (i=1; i<size; i++) { 1119 rlen = (range[i+1] - range[i])/bs; 1120 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1121 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1122 for (k=0; k<rlen; k++) { 1123 for (j=0; j<bs; j++) { 1124 row_lens[k*bs+j] = bs*crow_lens[k]; 1125 } 1126 } 1127 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1128 } 1129 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1130 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1131 } else { 1132 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1133 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1134 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1135 } 1136 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1137 1138 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1139 information needed to make it for each row from a block row. This does require more communication but still not more than 1140 the communication needed for the nonzero values */ 1141 nzmax = nz; /* space a largest processor needs */ 1142 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1143 ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1144 cnt = 0; 1145 for (i=0; i<a->mbs; i++) { 1146 pcnt = cnt; 1147 for (j=B->i[i]; j<B->i[i+1]; j++) { 1148 if ( (col = garray[B->j[j]]) > cstart) break; 1149 for (l=0; l<bs; l++) { 1150 column_indices[cnt++] = bs*col+l; 1151 } 1152 } 1153 for (k=A->i[i]; k<A->i[i+1]; k++) { 1154 for (l=0; l<bs; l++) { 1155 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1156 } 1157 } 1158 for (; j<B->i[i+1]; j++) { 1159 for (l=0; l<bs; l++) { 1160 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1161 } 1162 } 1163 len = cnt - pcnt; 1164 for (k=1; k<bs; k++) { 1165 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1166 cnt += len; 1167 } 1168 } 1169 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1170 1171 /* store the columns to the file */ 1172 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1173 if (!rank) { 1174 MPI_Status status; 1175 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1176 for (i=1; i<size; i++) { 1177 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1178 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1179 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1180 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1181 } 1182 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1183 } else { 1184 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1185 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1186 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1187 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1188 } 1189 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1190 1191 /* load up the numerical values */ 1192 ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1193 cnt = 0; 1194 for (i=0; i<a->mbs; i++) { 1195 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1196 for (j=B->i[i]; j<B->i[i+1]; j++) { 1197 if ( garray[B->j[j]] > cstart) break; 1198 for (l=0; l<bs; l++) { 1199 for (ll=0; ll<bs; ll++) { 1200 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1201 } 1202 } 1203 cnt += bs; 1204 } 1205 for (k=A->i[i]; k<A->i[i+1]; k++) { 1206 for (l=0; l<bs; l++) { 1207 for (ll=0; ll<bs; ll++) { 1208 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1209 } 1210 } 1211 cnt += bs; 1212 } 1213 for (; j<B->i[i+1]; j++) { 1214 for (l=0; l<bs; l++) { 1215 for (ll=0; ll<bs; ll++) { 1216 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1217 } 1218 } 1219 cnt += bs; 1220 } 1221 cnt += (bs-1)*rlen; 1222 } 1223 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1224 1225 /* store the column values to the file */ 1226 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1227 if (!rank) { 1228 MPI_Status status; 1229 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1230 for (i=1; i<size; i++) { 1231 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1232 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1233 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1234 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1235 } 1236 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1237 } else { 1238 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1239 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1240 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1241 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1242 } 1243 ierr = PetscFree(column_values);CHKERRQ(ierr); 1244 1245 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1246 if (file) { 1247 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1248 } 1249 PetscFunctionReturn(0); 1250 } 1251 1252 #undef __FUNCT__ 1253 #define __FUNCT__ "MatView_MPIBAIJ" 1254 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1255 { 1256 PetscErrorCode ierr; 1257 PetscBool iascii,isdraw,issocket,isbinary; 1258 1259 PetscFunctionBegin; 1260 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1261 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1262 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1263 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1264 if (iascii || isdraw || issocket) { 1265 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1266 } else if (isbinary) { 1267 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1268 } else { 1269 SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1270 } 1271 PetscFunctionReturn(0); 1272 } 1273 1274 #undef __FUNCT__ 1275 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1276 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1277 { 1278 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1279 PetscErrorCode ierr; 1280 1281 PetscFunctionBegin; 1282 #if defined(PETSC_USE_LOG) 1283 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1284 #endif 1285 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1286 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1287 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1288 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1289 #if defined (PETSC_USE_CTABLE) 1290 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1291 #else 1292 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1293 #endif 1294 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1295 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1296 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1297 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1298 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1299 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1300 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1301 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1302 1303 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1307 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1308 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1309 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1310 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1311 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1312 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",PETSC_NULL);CHKERRQ(ierr); 1313 PetscFunctionReturn(0); 1314 } 1315 1316 #undef __FUNCT__ 1317 #define __FUNCT__ "MatMult_MPIBAIJ" 1318 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1319 { 1320 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1321 PetscErrorCode ierr; 1322 PetscInt nt; 1323 1324 PetscFunctionBegin; 1325 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1326 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1327 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1328 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1329 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1330 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1331 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1332 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1333 PetscFunctionReturn(0); 1334 } 1335 1336 #undef __FUNCT__ 1337 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1338 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1339 { 1340 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1341 PetscErrorCode ierr; 1342 1343 PetscFunctionBegin; 1344 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1345 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1346 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1347 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1348 PetscFunctionReturn(0); 1349 } 1350 1351 #undef __FUNCT__ 1352 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1353 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1354 { 1355 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1356 PetscErrorCode ierr; 1357 PetscBool merged; 1358 1359 PetscFunctionBegin; 1360 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1361 /* do nondiagonal part */ 1362 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1363 if (!merged) { 1364 /* send it on its way */ 1365 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1366 /* do local part */ 1367 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1368 /* receive remote parts: note this assumes the values are not actually */ 1369 /* inserted in yy until the next line */ 1370 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1371 } else { 1372 /* do local part */ 1373 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1374 /* send it on its way */ 1375 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1376 /* values actually were received in the Begin() but we need to call this nop */ 1377 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1378 } 1379 PetscFunctionReturn(0); 1380 } 1381 1382 #undef __FUNCT__ 1383 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1384 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1385 { 1386 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1387 PetscErrorCode ierr; 1388 1389 PetscFunctionBegin; 1390 /* do nondiagonal part */ 1391 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1392 /* send it on its way */ 1393 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1394 /* do local part */ 1395 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1396 /* receive remote parts: note this assumes the values are not actually */ 1397 /* inserted in yy until the next line, which is true for my implementation*/ 1398 /* but is not perhaps always true. */ 1399 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1400 PetscFunctionReturn(0); 1401 } 1402 1403 /* 1404 This only works correctly for square matrices where the subblock A->A is the 1405 diagonal block 1406 */ 1407 #undef __FUNCT__ 1408 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1409 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1410 { 1411 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1412 PetscErrorCode ierr; 1413 1414 PetscFunctionBegin; 1415 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1416 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1417 PetscFunctionReturn(0); 1418 } 1419 1420 #undef __FUNCT__ 1421 #define __FUNCT__ "MatScale_MPIBAIJ" 1422 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1423 { 1424 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1425 PetscErrorCode ierr; 1426 1427 PetscFunctionBegin; 1428 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1429 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1430 PetscFunctionReturn(0); 1431 } 1432 1433 #undef __FUNCT__ 1434 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1435 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1436 { 1437 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1438 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1439 PetscErrorCode ierr; 1440 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1441 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1442 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1443 1444 PetscFunctionBegin; 1445 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1446 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1447 mat->getrowactive = PETSC_TRUE; 1448 1449 if (!mat->rowvalues && (idx || v)) { 1450 /* 1451 allocate enough space to hold information from the longest row. 1452 */ 1453 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1454 PetscInt max = 1,mbs = mat->mbs,tmp; 1455 for (i=0; i<mbs; i++) { 1456 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1457 if (max < tmp) { max = tmp; } 1458 } 1459 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1460 } 1461 lrow = row - brstart; 1462 1463 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1464 if (!v) {pvA = 0; pvB = 0;} 1465 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1466 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1467 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1468 nztot = nzA + nzB; 1469 1470 cmap = mat->garray; 1471 if (v || idx) { 1472 if (nztot) { 1473 /* Sort by increasing column numbers, assuming A and B already sorted */ 1474 PetscInt imark = -1; 1475 if (v) { 1476 *v = v_p = mat->rowvalues; 1477 for (i=0; i<nzB; i++) { 1478 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1479 else break; 1480 } 1481 imark = i; 1482 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1483 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1484 } 1485 if (idx) { 1486 *idx = idx_p = mat->rowindices; 1487 if (imark > -1) { 1488 for (i=0; i<imark; i++) { 1489 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1490 } 1491 } else { 1492 for (i=0; i<nzB; i++) { 1493 if (cmap[cworkB[i]/bs] < cstart) 1494 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1495 else break; 1496 } 1497 imark = i; 1498 } 1499 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1500 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1501 } 1502 } else { 1503 if (idx) *idx = 0; 1504 if (v) *v = 0; 1505 } 1506 } 1507 *nz = nztot; 1508 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1509 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1510 PetscFunctionReturn(0); 1511 } 1512 1513 #undef __FUNCT__ 1514 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1515 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1516 { 1517 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1518 1519 PetscFunctionBegin; 1520 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1521 baij->getrowactive = PETSC_FALSE; 1522 PetscFunctionReturn(0); 1523 } 1524 1525 #undef __FUNCT__ 1526 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1527 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1528 { 1529 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1530 PetscErrorCode ierr; 1531 1532 PetscFunctionBegin; 1533 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1534 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1535 PetscFunctionReturn(0); 1536 } 1537 1538 #undef __FUNCT__ 1539 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1540 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1541 { 1542 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1543 Mat A = a->A,B = a->B; 1544 PetscErrorCode ierr; 1545 PetscReal isend[5],irecv[5]; 1546 1547 PetscFunctionBegin; 1548 info->block_size = (PetscReal)matin->rmap->bs; 1549 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1550 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1551 isend[3] = info->memory; isend[4] = info->mallocs; 1552 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1553 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1554 isend[3] += info->memory; isend[4] += info->mallocs; 1555 if (flag == MAT_LOCAL) { 1556 info->nz_used = isend[0]; 1557 info->nz_allocated = isend[1]; 1558 info->nz_unneeded = isend[2]; 1559 info->memory = isend[3]; 1560 info->mallocs = isend[4]; 1561 } else if (flag == MAT_GLOBAL_MAX) { 1562 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1563 info->nz_used = irecv[0]; 1564 info->nz_allocated = irecv[1]; 1565 info->nz_unneeded = irecv[2]; 1566 info->memory = irecv[3]; 1567 info->mallocs = irecv[4]; 1568 } else if (flag == MAT_GLOBAL_SUM) { 1569 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1570 info->nz_used = irecv[0]; 1571 info->nz_allocated = irecv[1]; 1572 info->nz_unneeded = irecv[2]; 1573 info->memory = irecv[3]; 1574 info->mallocs = irecv[4]; 1575 } else { 1576 SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1577 } 1578 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1579 info->fill_ratio_needed = 0; 1580 info->factor_mallocs = 0; 1581 PetscFunctionReturn(0); 1582 } 1583 1584 #undef __FUNCT__ 1585 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1586 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1587 { 1588 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1589 PetscErrorCode ierr; 1590 1591 PetscFunctionBegin; 1592 switch (op) { 1593 case MAT_NEW_NONZERO_LOCATIONS: 1594 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1595 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1596 case MAT_KEEP_NONZERO_PATTERN: 1597 case MAT_NEW_NONZERO_LOCATION_ERR: 1598 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1599 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1600 break; 1601 case MAT_ROW_ORIENTED: 1602 a->roworiented = flg; 1603 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1604 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1605 break; 1606 case MAT_NEW_DIAGONALS: 1607 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1608 break; 1609 case MAT_IGNORE_OFF_PROC_ENTRIES: 1610 a->donotstash = flg; 1611 break; 1612 case MAT_USE_HASH_TABLE: 1613 a->ht_flag = flg; 1614 break; 1615 case MAT_SYMMETRIC: 1616 case MAT_STRUCTURALLY_SYMMETRIC: 1617 case MAT_HERMITIAN: 1618 case MAT_SYMMETRY_ETERNAL: 1619 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1620 break; 1621 default: 1622 SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op); 1623 } 1624 PetscFunctionReturn(0); 1625 } 1626 1627 #undef __FUNCT__ 1628 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1629 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1630 { 1631 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1632 Mat_SeqBAIJ *Aloc; 1633 Mat B; 1634 PetscErrorCode ierr; 1635 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1636 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1637 MatScalar *a; 1638 1639 PetscFunctionBegin; 1640 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1641 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1642 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1643 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1644 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1645 /* Do not know preallocation information, but must set block size */ 1646 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,PETSC_NULL,PETSC_DECIDE,PETSC_NULL);CHKERRQ(ierr); 1647 } else { 1648 B = *matout; 1649 } 1650 1651 /* copy over the A part */ 1652 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1653 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1654 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1655 1656 for (i=0; i<mbs; i++) { 1657 rvals[0] = bs*(baij->rstartbs + i); 1658 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1659 for (j=ai[i]; j<ai[i+1]; j++) { 1660 col = (baij->cstartbs+aj[j])*bs; 1661 for (k=0; k<bs; k++) { 1662 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1663 col++; a += bs; 1664 } 1665 } 1666 } 1667 /* copy over the B part */ 1668 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1669 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1670 for (i=0; i<mbs; i++) { 1671 rvals[0] = bs*(baij->rstartbs + i); 1672 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1673 for (j=ai[i]; j<ai[i+1]; j++) { 1674 col = baij->garray[aj[j]]*bs; 1675 for (k=0; k<bs; k++) { 1676 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1677 col++; a += bs; 1678 } 1679 } 1680 } 1681 ierr = PetscFree(rvals);CHKERRQ(ierr); 1682 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1683 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1684 1685 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1686 *matout = B; 1687 } else { 1688 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1689 } 1690 PetscFunctionReturn(0); 1691 } 1692 1693 #undef __FUNCT__ 1694 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1695 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1696 { 1697 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1698 Mat a = baij->A,b = baij->B; 1699 PetscErrorCode ierr; 1700 PetscInt s1,s2,s3; 1701 1702 PetscFunctionBegin; 1703 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1704 if (rr) { 1705 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1706 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1707 /* Overlap communication with computation. */ 1708 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1709 } 1710 if (ll) { 1711 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1712 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1713 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1714 } 1715 /* scale the diagonal block */ 1716 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1717 1718 if (rr) { 1719 /* Do a scatter end and then right scale the off-diagonal block */ 1720 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1721 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1722 } 1723 1724 PetscFunctionReturn(0); 1725 } 1726 1727 #undef __FUNCT__ 1728 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1729 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1730 { 1731 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1732 PetscErrorCode ierr; 1733 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1734 PetscInt i,*owners = A->rmap->range; 1735 PetscInt *nprocs,j,idx,nsends,row; 1736 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1737 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1738 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1739 MPI_Comm comm = ((PetscObject)A)->comm; 1740 MPI_Request *send_waits,*recv_waits; 1741 MPI_Status recv_status,*send_status; 1742 const PetscScalar *xx; 1743 PetscScalar *bb; 1744 #if defined(PETSC_DEBUG) 1745 PetscBool found = PETSC_FALSE; 1746 #endif 1747 1748 PetscFunctionBegin; 1749 /* first count number of contributors to each processor */ 1750 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1751 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1752 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1753 j = 0; 1754 for (i=0; i<N; i++) { 1755 if (lastidx > (idx = rows[i])) j = 0; 1756 lastidx = idx; 1757 for (; j<size; j++) { 1758 if (idx >= owners[j] && idx < owners[j+1]) { 1759 nprocs[2*j]++; 1760 nprocs[2*j+1] = 1; 1761 owner[i] = j; 1762 #if defined(PETSC_DEBUG) 1763 found = PETSC_TRUE; 1764 #endif 1765 break; 1766 } 1767 } 1768 #if defined(PETSC_DEBUG) 1769 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1770 found = PETSC_FALSE; 1771 #endif 1772 } 1773 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1774 1775 if (A->nooffproczerorows) { 1776 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 1777 nrecvs = nsends; 1778 nmax = N; 1779 } else { 1780 /* inform other processors of number of messages and max length*/ 1781 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1782 } 1783 1784 /* post receives: */ 1785 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1786 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1787 for (i=0; i<nrecvs; i++) { 1788 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1789 } 1790 1791 /* do sends: 1792 1) starts[i] gives the starting index in svalues for stuff going to 1793 the ith processor 1794 */ 1795 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1796 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1797 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1798 starts[0] = 0; 1799 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1800 for (i=0; i<N; i++) { 1801 svalues[starts[owner[i]]++] = rows[i]; 1802 } 1803 1804 starts[0] = 0; 1805 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1806 count = 0; 1807 for (i=0; i<size; i++) { 1808 if (nprocs[2*i+1]) { 1809 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1810 } 1811 } 1812 ierr = PetscFree(starts);CHKERRQ(ierr); 1813 1814 base = owners[rank]; 1815 1816 /* wait on receives */ 1817 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1818 count = nrecvs; 1819 slen = 0; 1820 while (count) { 1821 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1822 /* unpack receives into our local space */ 1823 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1824 source[imdex] = recv_status.MPI_SOURCE; 1825 lens[imdex] = n; 1826 slen += n; 1827 count--; 1828 } 1829 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1830 1831 /* move the data into the send scatter */ 1832 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1833 count = 0; 1834 for (i=0; i<nrecvs; i++) { 1835 values = rvalues + i*nmax; 1836 for (j=0; j<lens[i]; j++) { 1837 lrows[count++] = values[j] - base; 1838 } 1839 } 1840 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1841 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1842 ierr = PetscFree(owner);CHKERRQ(ierr); 1843 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1844 1845 /* fix right hand side if needed */ 1846 if (x && b) { 1847 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1848 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1849 for (i=0; i<slen; i++) { 1850 bb[lrows[i]] = diag*xx[lrows[i]]; 1851 } 1852 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1853 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1854 } 1855 1856 /* actually zap the local rows */ 1857 /* 1858 Zero the required rows. If the "diagonal block" of the matrix 1859 is square and the user wishes to set the diagonal we use separate 1860 code so that MatSetValues() is not called for each diagonal allocating 1861 new memory, thus calling lots of mallocs and slowing things down. 1862 1863 */ 1864 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1865 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1866 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1867 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 1868 } else if (diag != 0.0) { 1869 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1870 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1871 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1872 for (i=0; i<slen; i++) { 1873 row = lrows[i] + rstart_bs; 1874 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1875 } 1876 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1877 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1878 } else { 1879 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1880 } 1881 1882 ierr = PetscFree(lrows);CHKERRQ(ierr); 1883 1884 /* wait on sends */ 1885 if (nsends) { 1886 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1887 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1888 ierr = PetscFree(send_status);CHKERRQ(ierr); 1889 } 1890 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1891 ierr = PetscFree(svalues);CHKERRQ(ierr); 1892 1893 PetscFunctionReturn(0); 1894 } 1895 1896 #undef __FUNCT__ 1897 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1898 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1899 { 1900 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1901 PetscErrorCode ierr; 1902 1903 PetscFunctionBegin; 1904 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1905 PetscFunctionReturn(0); 1906 } 1907 1908 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1909 1910 #undef __FUNCT__ 1911 #define __FUNCT__ "MatEqual_MPIBAIJ" 1912 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1913 { 1914 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1915 Mat a,b,c,d; 1916 PetscBool flg; 1917 PetscErrorCode ierr; 1918 1919 PetscFunctionBegin; 1920 a = matA->A; b = matA->B; 1921 c = matB->A; d = matB->B; 1922 1923 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1924 if (flg) { 1925 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1926 } 1927 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1928 PetscFunctionReturn(0); 1929 } 1930 1931 #undef __FUNCT__ 1932 #define __FUNCT__ "MatCopy_MPIBAIJ" 1933 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1934 { 1935 PetscErrorCode ierr; 1936 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1937 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1938 1939 PetscFunctionBegin; 1940 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1941 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1942 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1943 } else { 1944 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1945 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1946 } 1947 PetscFunctionReturn(0); 1948 } 1949 1950 #undef __FUNCT__ 1951 #define __FUNCT__ "MatSetUp_MPIBAIJ" 1952 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1953 { 1954 PetscErrorCode ierr; 1955 1956 PetscFunctionBegin; 1957 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1958 PetscFunctionReturn(0); 1959 } 1960 1961 #undef __FUNCT__ 1962 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1963 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1964 { 1965 PetscErrorCode ierr; 1966 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1967 PetscBLASInt bnz,one=1; 1968 Mat_SeqBAIJ *x,*y; 1969 1970 PetscFunctionBegin; 1971 if (str == SAME_NONZERO_PATTERN) { 1972 PetscScalar alpha = a; 1973 x = (Mat_SeqBAIJ *)xx->A->data; 1974 y = (Mat_SeqBAIJ *)yy->A->data; 1975 bnz = PetscBLASIntCast(x->nz); 1976 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1977 x = (Mat_SeqBAIJ *)xx->B->data; 1978 y = (Mat_SeqBAIJ *)yy->B->data; 1979 bnz = PetscBLASIntCast(x->nz); 1980 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1981 } else { 1982 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1983 } 1984 PetscFunctionReturn(0); 1985 } 1986 1987 #undef __FUNCT__ 1988 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1989 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1990 { 1991 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1992 PetscErrorCode ierr; 1993 1994 PetscFunctionBegin; 1995 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1996 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1997 PetscFunctionReturn(0); 1998 } 1999 2000 #undef __FUNCT__ 2001 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 2002 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2003 { 2004 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2005 PetscErrorCode ierr; 2006 2007 PetscFunctionBegin; 2008 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2009 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2010 PetscFunctionReturn(0); 2011 } 2012 2013 #undef __FUNCT__ 2014 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2015 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2016 { 2017 PetscErrorCode ierr; 2018 IS iscol_local; 2019 PetscInt csize; 2020 2021 PetscFunctionBegin; 2022 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2023 if (call == MAT_REUSE_MATRIX) { 2024 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2025 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2026 } else { 2027 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2028 } 2029 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2030 if (call == MAT_INITIAL_MATRIX) { 2031 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2032 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2033 } 2034 PetscFunctionReturn(0); 2035 } 2036 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*); 2037 #undef __FUNCT__ 2038 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2039 /* 2040 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2041 in local and then by concatenating the local matrices the end result. 2042 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2043 */ 2044 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2045 { 2046 PetscErrorCode ierr; 2047 PetscMPIInt rank,size; 2048 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2049 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow; 2050 Mat M,Mreuse; 2051 MatScalar *vwork,*aa; 2052 MPI_Comm comm = ((PetscObject)mat)->comm; 2053 IS isrow_new, iscol_new; 2054 PetscBool idflag,allrows, allcols; 2055 Mat_SeqBAIJ *aij; 2056 2057 2058 PetscFunctionBegin; 2059 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2060 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2061 /* The compression and expansion should be avoided. Doesn't point 2062 out errors, might change the indices, hence buggey */ 2063 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 2064 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 2065 2066 /* Check for special case: each processor gets entire matrix columns */ 2067 ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr); 2068 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 2069 if (idflag && ncol == mat->cmap->N){ 2070 allcols = PETSC_TRUE; 2071 } else { 2072 allcols = PETSC_FALSE; 2073 } 2074 2075 ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr); 2076 ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr); 2077 if (idflag && nrow == mat->rmap->N){ 2078 allrows = PETSC_TRUE; 2079 } else { 2080 allrows = PETSC_FALSE; 2081 } 2082 if (call == MAT_REUSE_MATRIX) { 2083 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2084 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2085 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2086 } else { 2087 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2088 } 2089 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 2090 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 2091 /* 2092 m - number of local rows 2093 n - number of columns (same on all processors) 2094 rstart - first row in new global matrix generated 2095 */ 2096 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2097 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2098 m = m/bs; 2099 n = n/bs; 2100 2101 if (call == MAT_INITIAL_MATRIX) { 2102 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2103 ii = aij->i; 2104 jj = aij->j; 2105 2106 /* 2107 Determine the number of non-zeros in the diagonal and off-diagonal 2108 portions of the matrix in order to do correct preallocation 2109 */ 2110 2111 /* first get start and end of "diagonal" columns */ 2112 if (csize == PETSC_DECIDE) { 2113 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2114 if (mglobal == n*bs) { /* square matrix */ 2115 nlocal = m; 2116 } else { 2117 nlocal = n/size + ((n % size) > rank); 2118 } 2119 } else { 2120 nlocal = csize/bs; 2121 } 2122 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2123 rstart = rend - nlocal; 2124 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2125 2126 /* next, compute all the lengths */ 2127 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2128 olens = dlens + m; 2129 for (i=0; i<m; i++) { 2130 jend = ii[i+1] - ii[i]; 2131 olen = 0; 2132 dlen = 0; 2133 for (j=0; j<jend; j++) { 2134 if (*jj < rstart || *jj >= rend) olen++; 2135 else dlen++; 2136 jj++; 2137 } 2138 olens[i] = olen; 2139 dlens[i] = dlen; 2140 } 2141 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2142 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2143 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2144 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2145 ierr = PetscFree(dlens);CHKERRQ(ierr); 2146 } else { 2147 PetscInt ml,nl; 2148 2149 M = *newmat; 2150 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2151 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2152 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2153 /* 2154 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2155 rather than the slower MatSetValues(). 2156 */ 2157 M->was_assembled = PETSC_TRUE; 2158 M->assembled = PETSC_FALSE; 2159 } 2160 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2161 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2162 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2163 ii = aij->i; 2164 jj = aij->j; 2165 aa = aij->a; 2166 for (i=0; i<m; i++) { 2167 row = rstart/bs + i; 2168 nz = ii[i+1] - ii[i]; 2169 cwork = jj; jj += nz; 2170 vwork = aa; aa += nz*bs*bs; 2171 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2172 } 2173 2174 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2175 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2176 *newmat = M; 2177 2178 /* save submatrix used in processor for next request */ 2179 if (call == MAT_INITIAL_MATRIX) { 2180 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2181 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2182 } 2183 2184 PetscFunctionReturn(0); 2185 } 2186 2187 #undef __FUNCT__ 2188 #define __FUNCT__ "MatPermute_MPIBAIJ" 2189 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2190 { 2191 MPI_Comm comm,pcomm; 2192 PetscInt first,rlocal_size,clocal_size,nrows; 2193 const PetscInt *rows; 2194 PetscMPIInt size; 2195 IS crowp,growp,irowp,lrowp,lcolp; 2196 PetscErrorCode ierr; 2197 2198 PetscFunctionBegin; 2199 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2200 /* make a collective version of 'rowp' */ 2201 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2202 if (pcomm==comm) { 2203 crowp = rowp; 2204 } else { 2205 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2206 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2207 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2208 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2209 } 2210 /* collect the global row permutation and invert it */ 2211 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2212 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2213 if (pcomm!=comm) { 2214 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2215 } 2216 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2217 ierr = ISDestroy(&growp);CHKERRQ(ierr); 2218 /* get the local target indices */ 2219 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2220 ierr = MatGetLocalSize(A,&rlocal_size,&clocal_size);CHKERRQ(ierr); 2221 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2222 ierr = ISCreateGeneral(MPI_COMM_SELF,rlocal_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 2223 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2224 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 2225 /* the column permutation is so much easier; 2226 make a local version of 'colp' and invert it */ 2227 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2228 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2229 if (size==1) { 2230 lcolp = colp; 2231 } else { 2232 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 2233 } 2234 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2235 /* now we just get the submatrix */ 2236 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2237 if (size>1) { 2238 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2239 } 2240 /* clean up */ 2241 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 2242 PetscFunctionReturn(0); 2243 } 2244 2245 #undef __FUNCT__ 2246 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2247 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2248 { 2249 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2250 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2251 2252 PetscFunctionBegin; 2253 if (nghosts) { *nghosts = B->nbs;} 2254 if (ghosts) {*ghosts = baij->garray;} 2255 PetscFunctionReturn(0); 2256 } 2257 2258 extern PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat); 2259 2260 #undef __FUNCT__ 2261 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2262 /* 2263 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2264 */ 2265 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2266 { 2267 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2268 PetscErrorCode ierr; 2269 PetscMPIInt size,*ncolsonproc,*disp,nn; 2270 PetscInt bs,i,n,nrows,j,k,m,ncols,col; 2271 const PetscInt *is,*rows = 0,*A_ci,*A_cj,*B_ci,*B_cj; 2272 PetscInt nis = iscoloring->n,nctot,*cols; 2273 PetscInt *rowhit,M,cstart,cend,colb; 2274 PetscInt *columnsforrow,l; 2275 IS *isa; 2276 PetscBool done,flg; 2277 ISLocalToGlobalMapping map = mat->cmap->bmapping; 2278 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2279 2280 PetscFunctionBegin; 2281 if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2282 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2283 2284 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2285 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2286 M = mat->rmap->n/bs; 2287 cstart = mat->cmap->rstart/bs; 2288 cend = mat->cmap->rend/bs; 2289 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2290 c->N = mat->cmap->N/bs; 2291 c->m = mat->rmap->n/bs; 2292 c->rstart = mat->rmap->rstart/bs; 2293 2294 c->ncolors = nis; 2295 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2296 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2297 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2298 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2299 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2300 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2301 2302 /* Allow access to data structures of local part of matrix */ 2303 if (!baij->colmap) { 2304 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2305 } 2306 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2307 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2308 2309 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2310 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2311 2312 for (i=0; i<nis; i++) { 2313 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2314 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2315 c->ncolumns[i] = n; 2316 if (n) { 2317 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2318 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2319 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2320 } else { 2321 c->columns[i] = 0; 2322 } 2323 2324 if (ctype == IS_COLORING_GLOBAL){ 2325 /* Determine the total (parallel) number of columns of this color */ 2326 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2327 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2328 2329 nn = PetscMPIIntCast(n); 2330 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2331 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2332 if (!nctot) { 2333 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2334 } 2335 2336 disp[0] = 0; 2337 for (j=1; j<size; j++) { 2338 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2339 } 2340 2341 /* Get complete list of columns for color on each processor */ 2342 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2343 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2344 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2345 } else if (ctype == IS_COLORING_GHOSTED){ 2346 /* Determine local number of columns of this color on this process, including ghost points */ 2347 nctot = n; 2348 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2349 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2350 } else { 2351 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2352 } 2353 2354 /* 2355 Mark all rows affect by these columns 2356 */ 2357 /* Temporary option to allow for debugging/testing */ 2358 flg = PETSC_FALSE; 2359 ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2360 if (!flg) {/*-----------------------------------------------------------------------------*/ 2361 /* crude, fast version */ 2362 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2363 /* loop over columns*/ 2364 for (j=0; j<nctot; j++) { 2365 if (ctype == IS_COLORING_GHOSTED) { 2366 col = ltog[cols[j]]; 2367 } else { 2368 col = cols[j]; 2369 } 2370 if (col >= cstart && col < cend) { 2371 /* column is in diagonal block of matrix */ 2372 rows = A_cj + A_ci[col-cstart]; 2373 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2374 } else { 2375 #if defined (PETSC_USE_CTABLE) 2376 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2377 colb --; 2378 #else 2379 colb = baij->colmap[col] - 1; 2380 #endif 2381 if (colb == -1) { 2382 m = 0; 2383 } else { 2384 colb = colb/bs; 2385 rows = B_cj + B_ci[colb]; 2386 m = B_ci[colb+1] - B_ci[colb]; 2387 } 2388 } 2389 /* loop over columns marking them in rowhit */ 2390 for (k=0; k<m; k++) { 2391 rowhit[*rows++] = col + 1; 2392 } 2393 } 2394 2395 /* count the number of hits */ 2396 nrows = 0; 2397 for (j=0; j<M; j++) { 2398 if (rowhit[j]) nrows++; 2399 } 2400 c->nrows[i] = nrows; 2401 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2402 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2403 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2404 nrows = 0; 2405 for (j=0; j<M; j++) { 2406 if (rowhit[j]) { 2407 c->rows[i][nrows] = j; 2408 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2409 nrows++; 2410 } 2411 } 2412 } else {/*-------------------------------------------------------------------------------*/ 2413 /* slow version, using rowhit as a linked list */ 2414 PetscInt currentcol,fm,mfm; 2415 rowhit[M] = M; 2416 nrows = 0; 2417 /* loop over columns*/ 2418 for (j=0; j<nctot; j++) { 2419 if (ctype == IS_COLORING_GHOSTED) { 2420 col = ltog[cols[j]]; 2421 } else { 2422 col = cols[j]; 2423 } 2424 if (col >= cstart && col < cend) { 2425 /* column is in diagonal block of matrix */ 2426 rows = A_cj + A_ci[col-cstart]; 2427 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2428 } else { 2429 #if defined (PETSC_USE_CTABLE) 2430 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2431 colb --; 2432 #else 2433 colb = baij->colmap[col] - 1; 2434 #endif 2435 if (colb == -1) { 2436 m = 0; 2437 } else { 2438 colb = colb/bs; 2439 rows = B_cj + B_ci[colb]; 2440 m = B_ci[colb+1] - B_ci[colb]; 2441 } 2442 } 2443 2444 /* loop over columns marking them in rowhit */ 2445 fm = M; /* fm points to first entry in linked list */ 2446 for (k=0; k<m; k++) { 2447 currentcol = *rows++; 2448 /* is it already in the list? */ 2449 do { 2450 mfm = fm; 2451 fm = rowhit[fm]; 2452 } while (fm < currentcol); 2453 /* not in list so add it */ 2454 if (fm != currentcol) { 2455 nrows++; 2456 columnsforrow[currentcol] = col; 2457 /* next three lines insert new entry into linked list */ 2458 rowhit[mfm] = currentcol; 2459 rowhit[currentcol] = fm; 2460 fm = currentcol; 2461 /* fm points to present position in list since we know the columns are sorted */ 2462 } else { 2463 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2464 } 2465 } 2466 } 2467 c->nrows[i] = nrows; 2468 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2469 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2470 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2471 /* now store the linked list of rows into c->rows[i] */ 2472 nrows = 0; 2473 fm = rowhit[M]; 2474 do { 2475 c->rows[i][nrows] = fm; 2476 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2477 fm = rowhit[fm]; 2478 } while (fm < M); 2479 } /* ---------------------------------------------------------------------------------------*/ 2480 ierr = PetscFree(cols);CHKERRQ(ierr); 2481 } 2482 2483 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2484 /* 2485 vscale will contain the "diagonal" on processor scalings followed by the off processor 2486 */ 2487 if (ctype == IS_COLORING_GLOBAL) { 2488 PetscInt *garray; 2489 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2490 for (i=0; i<baij->B->cmap->n/bs; i++) { 2491 for (j=0; j<bs; j++) { 2492 garray[i*bs+j] = bs*baij->garray[i]+j; 2493 } 2494 } 2495 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2496 ierr = PetscFree(garray);CHKERRQ(ierr); 2497 CHKMEMQ; 2498 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2499 for (k=0; k<c->ncolors; k++) { 2500 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2501 for (l=0; l<c->nrows[k]; l++) { 2502 col = c->columnsforrow[k][l]; 2503 if (col >= cstart && col < cend) { 2504 /* column is in diagonal block of matrix */ 2505 colb = col - cstart; 2506 } else { 2507 /* column is in "off-processor" part */ 2508 #if defined (PETSC_USE_CTABLE) 2509 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2510 colb --; 2511 #else 2512 colb = baij->colmap[col] - 1; 2513 #endif 2514 colb = colb/bs; 2515 colb += cend - cstart; 2516 } 2517 c->vscaleforrow[k][l] = colb; 2518 } 2519 } 2520 } else if (ctype == IS_COLORING_GHOSTED) { 2521 /* Get gtol mapping */ 2522 PetscInt N = mat->cmap->N, *gtol; 2523 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2524 for (i=0; i<N; i++) gtol[i] = -1; 2525 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2526 2527 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2528 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2529 for (k=0; k<c->ncolors; k++) { 2530 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2531 for (l=0; l<c->nrows[k]; l++) { 2532 col = c->columnsforrow[k][l]; /* global column index */ 2533 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2534 } 2535 } 2536 ierr = PetscFree(gtol);CHKERRQ(ierr); 2537 } 2538 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2539 2540 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2541 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2542 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2543 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2544 CHKMEMQ; 2545 PetscFunctionReturn(0); 2546 } 2547 2548 #undef __FUNCT__ 2549 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ" 2550 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2551 { 2552 Mat B; 2553 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2554 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2555 Mat_SeqAIJ *b; 2556 PetscErrorCode ierr; 2557 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2558 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2559 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2560 2561 PetscFunctionBegin; 2562 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2563 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2564 2565 /* ---------------------------------------------------------------- 2566 Tell every processor the number of nonzeros per row 2567 */ 2568 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2569 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2570 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2571 } 2572 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2573 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2574 displs = recvcounts + size; 2575 for (i=0; i<size; i++) { 2576 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2577 displs[i] = A->rmap->range[i]/bs; 2578 } 2579 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2580 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2581 #else 2582 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2583 #endif 2584 /* --------------------------------------------------------------- 2585 Create the sequential matrix of the same type as the local block diagonal 2586 */ 2587 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2588 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2589 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2590 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2591 b = (Mat_SeqAIJ *)B->data; 2592 2593 /*-------------------------------------------------------------------- 2594 Copy my part of matrix column indices over 2595 */ 2596 sendcount = ad->nz + bd->nz; 2597 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2598 a_jsendbuf = ad->j; 2599 b_jsendbuf = bd->j; 2600 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2601 cnt = 0; 2602 for (i=0; i<n; i++) { 2603 2604 /* put in lower diagonal portion */ 2605 m = bd->i[i+1] - bd->i[i]; 2606 while (m > 0) { 2607 /* is it above diagonal (in bd (compressed) numbering) */ 2608 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2609 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2610 m--; 2611 } 2612 2613 /* put in diagonal portion */ 2614 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2615 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2616 } 2617 2618 /* put in upper diagonal portion */ 2619 while (m-- > 0) { 2620 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2621 } 2622 } 2623 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2624 2625 /*-------------------------------------------------------------------- 2626 Gather all column indices to all processors 2627 */ 2628 for (i=0; i<size; i++) { 2629 recvcounts[i] = 0; 2630 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2631 recvcounts[i] += lens[j]; 2632 } 2633 } 2634 displs[0] = 0; 2635 for (i=1; i<size; i++) { 2636 displs[i] = displs[i-1] + recvcounts[i-1]; 2637 } 2638 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2639 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2640 #else 2641 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2642 #endif 2643 /*-------------------------------------------------------------------- 2644 Assemble the matrix into useable form (note numerical values not yet set) 2645 */ 2646 /* set the b->ilen (length of each row) values */ 2647 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2648 /* set the b->i indices */ 2649 b->i[0] = 0; 2650 for (i=1; i<=A->rmap->N/bs; i++) { 2651 b->i[i] = b->i[i-1] + lens[i-1]; 2652 } 2653 ierr = PetscFree(lens);CHKERRQ(ierr); 2654 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2655 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2656 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2657 2658 if (A->symmetric){ 2659 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2660 } else if (A->hermitian) { 2661 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2662 } else if (A->structurally_symmetric) { 2663 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2664 } 2665 *newmat = B; 2666 PetscFunctionReturn(0); 2667 } 2668 2669 #undef __FUNCT__ 2670 #define __FUNCT__ "MatSOR_MPIBAIJ" 2671 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2672 { 2673 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2674 PetscErrorCode ierr; 2675 Vec bb1 = 0; 2676 2677 PetscFunctionBegin; 2678 if (flag == SOR_APPLY_UPPER) { 2679 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2680 PetscFunctionReturn(0); 2681 } 2682 2683 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2684 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2685 } 2686 2687 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2688 if (flag & SOR_ZERO_INITIAL_GUESS) { 2689 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2690 its--; 2691 } 2692 2693 while (its--) { 2694 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2695 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2696 2697 /* update rhs: bb1 = bb - B*x */ 2698 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2699 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2700 2701 /* local sweep */ 2702 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2703 } 2704 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 2705 if (flag & SOR_ZERO_INITIAL_GUESS) { 2706 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2707 its--; 2708 } 2709 while (its--) { 2710 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2711 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2712 2713 /* update rhs: bb1 = bb - B*x */ 2714 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2715 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2716 2717 /* local sweep */ 2718 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2719 } 2720 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 2721 if (flag & SOR_ZERO_INITIAL_GUESS) { 2722 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2723 its--; 2724 } 2725 while (its--) { 2726 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2727 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2728 2729 /* update rhs: bb1 = bb - B*x */ 2730 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2731 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2732 2733 /* local sweep */ 2734 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2735 } 2736 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2737 2738 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2739 PetscFunctionReturn(0); 2740 } 2741 2742 extern PetscErrorCode MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2743 2744 #undef __FUNCT__ 2745 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ" 2746 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2747 { 2748 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2749 PetscErrorCode ierr; 2750 2751 PetscFunctionBegin; 2752 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2753 PetscFunctionReturn(0); 2754 } 2755 2756 2757 /* -------------------------------------------------------------------*/ 2758 static struct _MatOps MatOps_Values = { 2759 MatSetValues_MPIBAIJ, 2760 MatGetRow_MPIBAIJ, 2761 MatRestoreRow_MPIBAIJ, 2762 MatMult_MPIBAIJ, 2763 /* 4*/ MatMultAdd_MPIBAIJ, 2764 MatMultTranspose_MPIBAIJ, 2765 MatMultTransposeAdd_MPIBAIJ, 2766 0, 2767 0, 2768 0, 2769 /*10*/ 0, 2770 0, 2771 0, 2772 MatSOR_MPIBAIJ, 2773 MatTranspose_MPIBAIJ, 2774 /*15*/ MatGetInfo_MPIBAIJ, 2775 MatEqual_MPIBAIJ, 2776 MatGetDiagonal_MPIBAIJ, 2777 MatDiagonalScale_MPIBAIJ, 2778 MatNorm_MPIBAIJ, 2779 /*20*/ MatAssemblyBegin_MPIBAIJ, 2780 MatAssemblyEnd_MPIBAIJ, 2781 MatSetOption_MPIBAIJ, 2782 MatZeroEntries_MPIBAIJ, 2783 /*24*/ MatZeroRows_MPIBAIJ, 2784 0, 2785 0, 2786 0, 2787 0, 2788 /*29*/ MatSetUp_MPIBAIJ, 2789 0, 2790 0, 2791 0, 2792 0, 2793 /*34*/ MatDuplicate_MPIBAIJ, 2794 0, 2795 0, 2796 0, 2797 0, 2798 /*39*/ MatAXPY_MPIBAIJ, 2799 MatGetSubMatrices_MPIBAIJ, 2800 MatIncreaseOverlap_MPIBAIJ, 2801 MatGetValues_MPIBAIJ, 2802 MatCopy_MPIBAIJ, 2803 /*44*/ 0, 2804 MatScale_MPIBAIJ, 2805 0, 2806 0, 2807 0, 2808 /*49*/ 0, 2809 0, 2810 0, 2811 0, 2812 0, 2813 /*54*/ MatFDColoringCreate_MPIBAIJ, 2814 0, 2815 MatSetUnfactored_MPIBAIJ, 2816 MatPermute_MPIBAIJ, 2817 MatSetValuesBlocked_MPIBAIJ, 2818 /*59*/ MatGetSubMatrix_MPIBAIJ, 2819 MatDestroy_MPIBAIJ, 2820 MatView_MPIBAIJ, 2821 0, 2822 0, 2823 /*64*/ 0, 2824 0, 2825 0, 2826 0, 2827 0, 2828 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2829 0, 2830 0, 2831 0, 2832 0, 2833 /*74*/ 0, 2834 MatFDColoringApply_BAIJ, 2835 0, 2836 0, 2837 0, 2838 /*79*/ 0, 2839 0, 2840 0, 2841 0, 2842 MatLoad_MPIBAIJ, 2843 /*84*/ 0, 2844 0, 2845 0, 2846 0, 2847 0, 2848 /*89*/ 0, 2849 0, 2850 0, 2851 0, 2852 0, 2853 /*94*/ 0, 2854 0, 2855 0, 2856 0, 2857 0, 2858 /*99*/ 0, 2859 0, 2860 0, 2861 0, 2862 0, 2863 /*104*/0, 2864 MatRealPart_MPIBAIJ, 2865 MatImaginaryPart_MPIBAIJ, 2866 0, 2867 0, 2868 /*109*/0, 2869 0, 2870 0, 2871 0, 2872 0, 2873 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2874 0, 2875 MatGetGhosts_MPIBAIJ, 2876 0, 2877 0, 2878 /*119*/0, 2879 0, 2880 0, 2881 0, 2882 0, 2883 /*124*/0, 2884 0, 2885 MatInvertBlockDiagonal_MPIBAIJ 2886 }; 2887 2888 EXTERN_C_BEGIN 2889 #undef __FUNCT__ 2890 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2891 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2892 { 2893 PetscFunctionBegin; 2894 *a = ((Mat_MPIBAIJ *)A->data)->A; 2895 PetscFunctionReturn(0); 2896 } 2897 EXTERN_C_END 2898 2899 EXTERN_C_BEGIN 2900 extern PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2901 EXTERN_C_END 2902 2903 EXTERN_C_BEGIN 2904 #undef __FUNCT__ 2905 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2906 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2907 { 2908 PetscInt m,rstart,cstart,cend; 2909 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2910 const PetscInt *JJ=0; 2911 PetscScalar *values=0; 2912 PetscErrorCode ierr; 2913 2914 PetscFunctionBegin; 2915 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2916 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2917 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2918 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2919 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2920 m = B->rmap->n/bs; 2921 rstart = B->rmap->rstart/bs; 2922 cstart = B->cmap->rstart/bs; 2923 cend = B->cmap->rend/bs; 2924 2925 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2926 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2927 for (i=0; i<m; i++) { 2928 nz = ii[i+1] - ii[i]; 2929 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2930 nz_max = PetscMax(nz_max,nz); 2931 JJ = jj + ii[i]; 2932 for (j=0; j<nz; j++) { 2933 if (*JJ >= cstart) break; 2934 JJ++; 2935 } 2936 d = 0; 2937 for (; j<nz; j++) { 2938 if (*JJ++ >= cend) break; 2939 d++; 2940 } 2941 d_nnz[i] = d; 2942 o_nnz[i] = nz - d; 2943 } 2944 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2945 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2946 2947 values = (PetscScalar*)V; 2948 if (!values) { 2949 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2950 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2951 } 2952 for (i=0; i<m; i++) { 2953 PetscInt row = i + rstart; 2954 PetscInt ncols = ii[i+1] - ii[i]; 2955 const PetscInt *icols = jj + ii[i]; 2956 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2957 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2958 } 2959 2960 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2961 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2962 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2963 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2964 PetscFunctionReturn(0); 2965 } 2966 EXTERN_C_END 2967 2968 #undef __FUNCT__ 2969 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2970 /*@C 2971 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2972 (the default parallel PETSc format). 2973 2974 Collective on MPI_Comm 2975 2976 Input Parameters: 2977 + A - the matrix 2978 . bs - the block size 2979 . i - the indices into j for the start of each local row (starts with zero) 2980 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2981 - v - optional values in the matrix 2982 2983 Level: developer 2984 2985 .keywords: matrix, aij, compressed row, sparse, parallel 2986 2987 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 2988 @*/ 2989 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2990 { 2991 PetscErrorCode ierr; 2992 2993 PetscFunctionBegin; 2994 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2995 PetscValidType(B,1); 2996 PetscValidLogicalCollectiveInt(B,bs,2); 2997 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2998 PetscFunctionReturn(0); 2999 } 3000 3001 EXTERN_C_BEGIN 3002 #undef __FUNCT__ 3003 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 3004 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 3005 { 3006 Mat_MPIBAIJ *b; 3007 PetscErrorCode ierr; 3008 PetscInt i; 3009 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3010 3011 PetscFunctionBegin; 3012 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3013 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3014 3015 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3016 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3017 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3018 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3019 3020 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3021 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3022 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3023 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3024 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3025 3026 if (d_nnz) { 3027 for (i=0; i<B->rmap->n/bs; i++) { 3028 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 3029 } 3030 } 3031 if (o_nnz) { 3032 for (i=0; i<B->rmap->n/bs; i++) { 3033 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 3034 } 3035 } 3036 3037 b = (Mat_MPIBAIJ*)B->data; 3038 b->bs2 = bs*bs; 3039 b->mbs = B->rmap->n/bs; 3040 b->nbs = B->cmap->n/bs; 3041 b->Mbs = B->rmap->N/bs; 3042 b->Nbs = B->cmap->N/bs; 3043 3044 for (i=0; i<=b->size; i++) { 3045 b->rangebs[i] = B->rmap->range[i]/bs; 3046 } 3047 b->rstartbs = B->rmap->rstart/bs; 3048 b->rendbs = B->rmap->rend/bs; 3049 b->cstartbs = B->cmap->rstart/bs; 3050 b->cendbs = B->cmap->rend/bs; 3051 3052 if (!B->preallocated) { 3053 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3054 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3055 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 3056 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3057 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3058 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3059 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 3060 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3061 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 3062 } 3063 3064 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3065 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 3066 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3067 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3068 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3069 B->preallocated = PETSC_TRUE; 3070 PetscFunctionReturn(0); 3071 } 3072 EXTERN_C_END 3073 3074 EXTERN_C_BEGIN 3075 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 3076 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 3077 EXTERN_C_END 3078 3079 3080 EXTERN_C_BEGIN 3081 #undef __FUNCT__ 3082 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 3083 PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 3084 { 3085 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 3086 PetscErrorCode ierr; 3087 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 3088 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 3089 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 3090 3091 PetscFunctionBegin; 3092 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 3093 ii[0] = 0; 3094 CHKMEMQ; 3095 for (i=0; i<M; i++) { 3096 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 3097 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 3098 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 3099 /* remove one from count of matrix has diagonal */ 3100 for (j=id[i]; j<id[i+1]; j++) { 3101 if (jd[j] == i) {ii[i+1]--;break;} 3102 } 3103 CHKMEMQ; 3104 } 3105 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3106 cnt = 0; 3107 for (i=0; i<M; i++) { 3108 for (j=io[i]; j<io[i+1]; j++) { 3109 if (garray[jo[j]] > rstart) break; 3110 jj[cnt++] = garray[jo[j]]; 3111 CHKMEMQ; 3112 } 3113 for (k=id[i]; k<id[i+1]; k++) { 3114 if (jd[k] != i) { 3115 jj[cnt++] = rstart + jd[k]; 3116 CHKMEMQ; 3117 } 3118 } 3119 for (;j<io[i+1]; j++) { 3120 jj[cnt++] = garray[jo[j]]; 3121 CHKMEMQ; 3122 } 3123 } 3124 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 3125 PetscFunctionReturn(0); 3126 } 3127 EXTERN_C_END 3128 3129 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3130 EXTERN_C_BEGIN 3131 PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 3132 EXTERN_C_END 3133 3134 EXTERN_C_BEGIN 3135 #undef __FUNCT__ 3136 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 3137 PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 3138 { 3139 PetscErrorCode ierr; 3140 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3141 Mat B; 3142 Mat_MPIAIJ *b; 3143 3144 PetscFunctionBegin; 3145 if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled"); 3146 3147 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 3148 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3149 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 3150 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3151 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 3152 b = (Mat_MPIAIJ*) B->data; 3153 3154 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 3155 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 3156 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 3157 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 3158 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 3159 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3160 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3161 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3162 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3163 if (reuse == MAT_REUSE_MATRIX) { 3164 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3165 } else { 3166 *newmat = B; 3167 } 3168 PetscFunctionReturn(0); 3169 } 3170 EXTERN_C_END 3171 3172 EXTERN_C_BEGIN 3173 #if defined(PETSC_HAVE_MUMPS) 3174 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3175 #endif 3176 EXTERN_C_END 3177 3178 /*MC 3179 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 3180 3181 Options Database Keys: 3182 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 3183 . -mat_block_size <bs> - set the blocksize used to store the matrix 3184 - -mat_use_hash_table <fact> 3185 3186 Level: beginner 3187 3188 .seealso: MatCreateMPIBAIJ 3189 M*/ 3190 3191 EXTERN_C_BEGIN 3192 extern PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 3193 EXTERN_C_END 3194 3195 EXTERN_C_BEGIN 3196 #undef __FUNCT__ 3197 #define __FUNCT__ "MatCreate_MPIBAIJ" 3198 PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3199 { 3200 Mat_MPIBAIJ *b; 3201 PetscErrorCode ierr; 3202 PetscBool flg; 3203 3204 PetscFunctionBegin; 3205 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 3206 B->data = (void*)b; 3207 3208 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3209 B->assembled = PETSC_FALSE; 3210 3211 B->insertmode = NOT_SET_VALUES; 3212 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 3213 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 3214 3215 /* build local table of row and column ownerships */ 3216 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 3217 3218 /* build cache for off array entries formed */ 3219 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 3220 b->donotstash = PETSC_FALSE; 3221 b->colmap = PETSC_NULL; 3222 b->garray = PETSC_NULL; 3223 b->roworiented = PETSC_TRUE; 3224 3225 /* stuff used in block assembly */ 3226 b->barray = 0; 3227 3228 /* stuff used for matrix vector multiply */ 3229 b->lvec = 0; 3230 b->Mvctx = 0; 3231 3232 /* stuff for MatGetRow() */ 3233 b->rowindices = 0; 3234 b->rowvalues = 0; 3235 b->getrowactive = PETSC_FALSE; 3236 3237 /* hash table stuff */ 3238 b->ht = 0; 3239 b->hd = 0; 3240 b->ht_size = 0; 3241 b->ht_flag = PETSC_FALSE; 3242 b->ht_fact = 0; 3243 b->ht_total_ct = 0; 3244 b->ht_insert_ct = 0; 3245 3246 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 3247 b->ijonly = PETSC_FALSE; 3248 3249 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3250 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3251 if (flg) { 3252 PetscReal fact = 1.39; 3253 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3254 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 3255 if (fact <= 1.0) fact = 1.39; 3256 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3257 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3258 } 3259 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3260 3261 #if defined(PETSC_HAVE_MUMPS) 3262 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3263 #endif 3264 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 3265 "MatConvert_MPIBAIJ_MPIAdj", 3266 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3267 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C", 3268 "MatConvert_MPIBAIJ_MPIAIJ", 3269 MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3270 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C", 3271 "MatConvert_MPIBAIJ_MPISBAIJ", 3272 MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3273 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3274 "MatStoreValues_MPIBAIJ", 3275 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3276 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3277 "MatRetrieveValues_MPIBAIJ", 3278 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3279 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3280 "MatGetDiagonalBlock_MPIBAIJ", 3281 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3282 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3283 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3284 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3285 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3286 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3287 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3288 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3289 "MatDiagonalScaleLocal_MPIBAIJ", 3290 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3291 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3292 "MatSetHashTableFactor_MPIBAIJ", 3293 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3294 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C", 3295 "MatConvert_MPIBAIJ_MPIBSTRM", 3296 MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr); 3297 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3298 PetscFunctionReturn(0); 3299 } 3300 EXTERN_C_END 3301 3302 /*MC 3303 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3304 3305 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3306 and MATMPIBAIJ otherwise. 3307 3308 Options Database Keys: 3309 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3310 3311 Level: beginner 3312 3313 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3314 M*/ 3315 3316 #undef __FUNCT__ 3317 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3318 /*@C 3319 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3320 (block compressed row). For good matrix assembly performance 3321 the user should preallocate the matrix storage by setting the parameters 3322 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3323 performance can be increased by more than a factor of 50. 3324 3325 Collective on Mat 3326 3327 Input Parameters: 3328 + A - the matrix 3329 . bs - size of blockk 3330 . d_nz - number of block nonzeros per block row in diagonal portion of local 3331 submatrix (same for all local rows) 3332 . d_nnz - array containing the number of block nonzeros in the various block rows 3333 of the in diagonal portion of the local (possibly different for each block 3334 row) or PETSC_NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 3335 set it even if it is zero. 3336 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3337 submatrix (same for all local rows). 3338 - o_nnz - array containing the number of nonzeros in the various block rows of the 3339 off-diagonal portion of the local submatrix (possibly different for 3340 each block row) or PETSC_NULL. 3341 3342 If the *_nnz parameter is given then the *_nz parameter is ignored 3343 3344 Options Database Keys: 3345 + -mat_block_size - size of the blocks to use 3346 - -mat_use_hash_table <fact> 3347 3348 Notes: 3349 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3350 than it must be used on all processors that share the object for that argument. 3351 3352 Storage Information: 3353 For a square global matrix we define each processor's diagonal portion 3354 to be its local rows and the corresponding columns (a square submatrix); 3355 each processor's off-diagonal portion encompasses the remainder of the 3356 local matrix (a rectangular submatrix). 3357 3358 The user can specify preallocated storage for the diagonal part of 3359 the local submatrix with either d_nz or d_nnz (not both). Set 3360 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3361 memory allocation. Likewise, specify preallocated storage for the 3362 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3363 3364 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3365 the figure below we depict these three local rows and all columns (0-11). 3366 3367 .vb 3368 0 1 2 3 4 5 6 7 8 9 10 11 3369 ------------------- 3370 row 3 | o o o d d d o o o o o o 3371 row 4 | o o o d d d o o o o o o 3372 row 5 | o o o d d d o o o o o o 3373 ------------------- 3374 .ve 3375 3376 Thus, any entries in the d locations are stored in the d (diagonal) 3377 submatrix, and any entries in the o locations are stored in the 3378 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3379 stored simply in the MATSEQBAIJ format for compressed row storage. 3380 3381 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3382 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3383 In general, for PDE problems in which most nonzeros are near the diagonal, 3384 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3385 or you will get TERRIBLE performance; see the users' manual chapter on 3386 matrices. 3387 3388 You can call MatGetInfo() to get information on how effective the preallocation was; 3389 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3390 You can also run with the option -info and look for messages with the string 3391 malloc in them to see if additional memory allocation was needed. 3392 3393 Level: intermediate 3394 3395 .keywords: matrix, block, aij, compressed row, sparse, parallel 3396 3397 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3398 @*/ 3399 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3400 { 3401 PetscErrorCode ierr; 3402 3403 PetscFunctionBegin; 3404 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3405 PetscValidType(B,1); 3406 PetscValidLogicalCollectiveInt(B,bs,2); 3407 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3408 PetscFunctionReturn(0); 3409 } 3410 3411 #undef __FUNCT__ 3412 #define __FUNCT__ "MatCreateBAIJ" 3413 /*@C 3414 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3415 (block compressed row). For good matrix assembly performance 3416 the user should preallocate the matrix storage by setting the parameters 3417 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3418 performance can be increased by more than a factor of 50. 3419 3420 Collective on MPI_Comm 3421 3422 Input Parameters: 3423 + comm - MPI communicator 3424 . bs - size of blockk 3425 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3426 This value should be the same as the local size used in creating the 3427 y vector for the matrix-vector product y = Ax. 3428 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3429 This value should be the same as the local size used in creating the 3430 x vector for the matrix-vector product y = Ax. 3431 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3432 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3433 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3434 submatrix (same for all local rows) 3435 . d_nnz - array containing the number of nonzero blocks in the various block rows 3436 of the in diagonal portion of the local (possibly different for each block 3437 row) or PETSC_NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3438 and set it even if it is zero. 3439 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3440 submatrix (same for all local rows). 3441 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3442 off-diagonal portion of the local submatrix (possibly different for 3443 each block row) or PETSC_NULL. 3444 3445 Output Parameter: 3446 . A - the matrix 3447 3448 Options Database Keys: 3449 + -mat_block_size - size of the blocks to use 3450 - -mat_use_hash_table <fact> 3451 3452 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3453 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3454 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3455 3456 Notes: 3457 If the *_nnz parameter is given then the *_nz parameter is ignored 3458 3459 A nonzero block is any block that as 1 or more nonzeros in it 3460 3461 The user MUST specify either the local or global matrix dimensions 3462 (possibly both). 3463 3464 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3465 than it must be used on all processors that share the object for that argument. 3466 3467 Storage Information: 3468 For a square global matrix we define each processor's diagonal portion 3469 to be its local rows and the corresponding columns (a square submatrix); 3470 each processor's off-diagonal portion encompasses the remainder of the 3471 local matrix (a rectangular submatrix). 3472 3473 The user can specify preallocated storage for the diagonal part of 3474 the local submatrix with either d_nz or d_nnz (not both). Set 3475 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3476 memory allocation. Likewise, specify preallocated storage for the 3477 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3478 3479 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3480 the figure below we depict these three local rows and all columns (0-11). 3481 3482 .vb 3483 0 1 2 3 4 5 6 7 8 9 10 11 3484 ------------------- 3485 row 3 | o o o d d d o o o o o o 3486 row 4 | o o o d d d o o o o o o 3487 row 5 | o o o d d d o o o o o o 3488 ------------------- 3489 .ve 3490 3491 Thus, any entries in the d locations are stored in the d (diagonal) 3492 submatrix, and any entries in the o locations are stored in the 3493 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3494 stored simply in the MATSEQBAIJ format for compressed row storage. 3495 3496 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3497 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3498 In general, for PDE problems in which most nonzeros are near the diagonal, 3499 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3500 or you will get TERRIBLE performance; see the users' manual chapter on 3501 matrices. 3502 3503 Level: intermediate 3504 3505 .keywords: matrix, block, aij, compressed row, sparse, parallel 3506 3507 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3508 @*/ 3509 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3510 { 3511 PetscErrorCode ierr; 3512 PetscMPIInt size; 3513 3514 PetscFunctionBegin; 3515 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3516 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3517 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3518 if (size > 1) { 3519 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3520 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3521 } else { 3522 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3523 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3524 } 3525 PetscFunctionReturn(0); 3526 } 3527 3528 #undef __FUNCT__ 3529 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3530 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3531 { 3532 Mat mat; 3533 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3534 PetscErrorCode ierr; 3535 PetscInt len=0; 3536 3537 PetscFunctionBegin; 3538 *newmat = 0; 3539 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3540 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3541 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3542 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3543 3544 mat->factortype = matin->factortype; 3545 mat->preallocated = PETSC_TRUE; 3546 mat->assembled = PETSC_TRUE; 3547 mat->insertmode = NOT_SET_VALUES; 3548 3549 a = (Mat_MPIBAIJ*)mat->data; 3550 mat->rmap->bs = matin->rmap->bs; 3551 a->bs2 = oldmat->bs2; 3552 a->mbs = oldmat->mbs; 3553 a->nbs = oldmat->nbs; 3554 a->Mbs = oldmat->Mbs; 3555 a->Nbs = oldmat->Nbs; 3556 3557 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3558 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3559 3560 a->size = oldmat->size; 3561 a->rank = oldmat->rank; 3562 a->donotstash = oldmat->donotstash; 3563 a->roworiented = oldmat->roworiented; 3564 a->rowindices = 0; 3565 a->rowvalues = 0; 3566 a->getrowactive = PETSC_FALSE; 3567 a->barray = 0; 3568 a->rstartbs = oldmat->rstartbs; 3569 a->rendbs = oldmat->rendbs; 3570 a->cstartbs = oldmat->cstartbs; 3571 a->cendbs = oldmat->cendbs; 3572 3573 /* hash table stuff */ 3574 a->ht = 0; 3575 a->hd = 0; 3576 a->ht_size = 0; 3577 a->ht_flag = oldmat->ht_flag; 3578 a->ht_fact = oldmat->ht_fact; 3579 a->ht_total_ct = 0; 3580 a->ht_insert_ct = 0; 3581 3582 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3583 if (oldmat->colmap) { 3584 #if defined (PETSC_USE_CTABLE) 3585 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3586 #else 3587 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3588 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3589 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3590 #endif 3591 } else a->colmap = 0; 3592 3593 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3594 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3595 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3596 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3597 } else a->garray = 0; 3598 3599 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3600 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3601 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3602 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3603 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3604 3605 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3606 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3607 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3608 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3609 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3610 *newmat = mat; 3611 3612 PetscFunctionReturn(0); 3613 } 3614 3615 #undef __FUNCT__ 3616 #define __FUNCT__ "MatLoad_MPIBAIJ" 3617 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3618 { 3619 PetscErrorCode ierr; 3620 int fd; 3621 PetscInt i,nz,j,rstart,rend; 3622 PetscScalar *vals,*buf; 3623 MPI_Comm comm = ((PetscObject)viewer)->comm; 3624 MPI_Status status; 3625 PetscMPIInt rank,size,maxnz; 3626 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3627 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3628 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3629 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3630 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3631 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3632 3633 PetscFunctionBegin; 3634 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3635 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3636 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3637 3638 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3639 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3640 if (!rank) { 3641 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3642 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3643 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3644 } 3645 3646 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3647 3648 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3649 M = header[1]; N = header[2]; 3650 3651 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3652 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3653 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3654 3655 /* If global sizes are set, check if they are consistent with that given in the file */ 3656 if (sizesset) { 3657 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3658 } 3659 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3660 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3661 3662 if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices"); 3663 3664 /* 3665 This code adds extra rows to make sure the number of rows is 3666 divisible by the blocksize 3667 */ 3668 Mbs = M/bs; 3669 extra_rows = bs - M + bs*Mbs; 3670 if (extra_rows == bs) extra_rows = 0; 3671 else Mbs++; 3672 if (extra_rows && !rank) { 3673 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3674 } 3675 3676 /* determine ownership of all rows */ 3677 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3678 mbs = Mbs/size + ((Mbs % size) > rank); 3679 m = mbs*bs; 3680 } else { /* User set */ 3681 m = newmat->rmap->n; 3682 mbs = m/bs; 3683 } 3684 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3685 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3686 3687 /* process 0 needs enough room for process with most rows */ 3688 if (!rank) { 3689 mmax = rowners[1]; 3690 for (i=2; i<=size; i++) { 3691 mmax = PetscMax(mmax,rowners[i]); 3692 } 3693 mmax*=bs; 3694 } else mmax = m; 3695 3696 rowners[0] = 0; 3697 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3698 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3699 rstart = rowners[rank]; 3700 rend = rowners[rank+1]; 3701 3702 /* distribute row lengths to all processors */ 3703 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3704 if (!rank) { 3705 mend = m; 3706 if (size == 1) mend = mend - extra_rows; 3707 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3708 for (j=mend; j<m; j++) locrowlens[j] = 1; 3709 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3710 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3711 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3712 for (j=0; j<m; j++) { 3713 procsnz[0] += locrowlens[j]; 3714 } 3715 for (i=1; i<size; i++) { 3716 mend = browners[i+1] - browners[i]; 3717 if (i == size-1) mend = mend - extra_rows; 3718 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3719 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3720 /* calculate the number of nonzeros on each processor */ 3721 for (j=0; j<browners[i+1]-browners[i]; j++) { 3722 procsnz[i] += rowlengths[j]; 3723 } 3724 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3725 } 3726 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3727 } else { 3728 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3729 } 3730 3731 if (!rank) { 3732 /* determine max buffer needed and allocate it */ 3733 maxnz = procsnz[0]; 3734 for (i=1; i<size; i++) { 3735 maxnz = PetscMax(maxnz,procsnz[i]); 3736 } 3737 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3738 3739 /* read in my part of the matrix column indices */ 3740 nz = procsnz[0]; 3741 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3742 mycols = ibuf; 3743 if (size == 1) nz -= extra_rows; 3744 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3745 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3746 3747 /* read in every ones (except the last) and ship off */ 3748 for (i=1; i<size-1; i++) { 3749 nz = procsnz[i]; 3750 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3751 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3752 } 3753 /* read in the stuff for the last proc */ 3754 if (size != 1) { 3755 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3756 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3757 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3758 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3759 } 3760 ierr = PetscFree(cols);CHKERRQ(ierr); 3761 } else { 3762 /* determine buffer space needed for message */ 3763 nz = 0; 3764 for (i=0; i<m; i++) { 3765 nz += locrowlens[i]; 3766 } 3767 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3768 mycols = ibuf; 3769 /* receive message of column indices*/ 3770 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3771 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3772 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3773 } 3774 3775 /* loop over local rows, determining number of off diagonal entries */ 3776 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3777 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3778 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3779 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3780 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3781 rowcount = 0; nzcount = 0; 3782 for (i=0; i<mbs; i++) { 3783 dcount = 0; 3784 odcount = 0; 3785 for (j=0; j<bs; j++) { 3786 kmax = locrowlens[rowcount]; 3787 for (k=0; k<kmax; k++) { 3788 tmp = mycols[nzcount++]/bs; 3789 if (!mask[tmp]) { 3790 mask[tmp] = 1; 3791 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3792 else masked1[dcount++] = tmp; 3793 } 3794 } 3795 rowcount++; 3796 } 3797 3798 dlens[i] = dcount; 3799 odlens[i] = odcount; 3800 3801 /* zero out the mask elements we set */ 3802 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3803 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3804 } 3805 3806 3807 if (!sizesset) { 3808 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3809 } 3810 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3811 3812 if (!rank) { 3813 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3814 /* read in my part of the matrix numerical values */ 3815 nz = procsnz[0]; 3816 vals = buf; 3817 mycols = ibuf; 3818 if (size == 1) nz -= extra_rows; 3819 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3820 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3821 3822 /* insert into matrix */ 3823 jj = rstart*bs; 3824 for (i=0; i<m; i++) { 3825 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3826 mycols += locrowlens[i]; 3827 vals += locrowlens[i]; 3828 jj++; 3829 } 3830 /* read in other processors (except the last one) and ship out */ 3831 for (i=1; i<size-1; i++) { 3832 nz = procsnz[i]; 3833 vals = buf; 3834 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3835 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3836 } 3837 /* the last proc */ 3838 if (size != 1){ 3839 nz = procsnz[i] - extra_rows; 3840 vals = buf; 3841 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3842 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3843 ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3844 } 3845 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3846 } else { 3847 /* receive numeric values */ 3848 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3849 3850 /* receive message of values*/ 3851 vals = buf; 3852 mycols = ibuf; 3853 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3854 3855 /* insert into matrix */ 3856 jj = rstart*bs; 3857 for (i=0; i<m; i++) { 3858 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3859 mycols += locrowlens[i]; 3860 vals += locrowlens[i]; 3861 jj++; 3862 } 3863 } 3864 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3865 ierr = PetscFree(buf);CHKERRQ(ierr); 3866 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3867 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3868 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3869 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3870 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3871 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3872 3873 PetscFunctionReturn(0); 3874 } 3875 3876 #undef __FUNCT__ 3877 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3878 /*@ 3879 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3880 3881 Input Parameters: 3882 . mat - the matrix 3883 . fact - factor 3884 3885 Not Collective, each process can use a different factor 3886 3887 Level: advanced 3888 3889 Notes: 3890 This can also be set by the command line option: -mat_use_hash_table <fact> 3891 3892 .keywords: matrix, hashtable, factor, HT 3893 3894 .seealso: MatSetOption() 3895 @*/ 3896 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3897 { 3898 PetscErrorCode ierr; 3899 3900 PetscFunctionBegin; 3901 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3902 PetscFunctionReturn(0); 3903 } 3904 3905 EXTERN_C_BEGIN 3906 #undef __FUNCT__ 3907 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3908 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3909 { 3910 Mat_MPIBAIJ *baij; 3911 3912 PetscFunctionBegin; 3913 baij = (Mat_MPIBAIJ*)mat->data; 3914 baij->ht_fact = fact; 3915 PetscFunctionReturn(0); 3916 } 3917 EXTERN_C_END 3918 3919 #undef __FUNCT__ 3920 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3921 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3922 { 3923 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3924 PetscFunctionBegin; 3925 *Ad = a->A; 3926 *Ao = a->B; 3927 *colmap = a->garray; 3928 PetscFunctionReturn(0); 3929 } 3930 3931 /* 3932 Special version for direct calls from Fortran (to eliminate two function call overheads 3933 */ 3934 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3935 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3936 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3937 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3938 #endif 3939 3940 #undef __FUNCT__ 3941 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3942 /*@C 3943 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3944 3945 Collective on Mat 3946 3947 Input Parameters: 3948 + mat - the matrix 3949 . min - number of input rows 3950 . im - input rows 3951 . nin - number of input columns 3952 . in - input columns 3953 . v - numerical values input 3954 - addvin - INSERT_VALUES or ADD_VALUES 3955 3956 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3957 3958 Level: advanced 3959 3960 .seealso: MatSetValuesBlocked() 3961 @*/ 3962 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3963 { 3964 /* convert input arguments to C version */ 3965 Mat mat = *matin; 3966 PetscInt m = *min, n = *nin; 3967 InsertMode addv = *addvin; 3968 3969 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3970 const MatScalar *value; 3971 MatScalar *barray=baij->barray; 3972 PetscBool roworiented = baij->roworiented; 3973 PetscErrorCode ierr; 3974 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3975 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3976 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3977 3978 PetscFunctionBegin; 3979 /* tasks normally handled by MatSetValuesBlocked() */ 3980 if (mat->insertmode == NOT_SET_VALUES) { 3981 mat->insertmode = addv; 3982 } 3983 #if defined(PETSC_USE_DEBUG) 3984 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3985 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3986 #endif 3987 if (mat->assembled) { 3988 mat->was_assembled = PETSC_TRUE; 3989 mat->assembled = PETSC_FALSE; 3990 } 3991 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3992 3993 3994 if (!barray) { 3995 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3996 baij->barray = barray; 3997 } 3998 3999 if (roworiented) { 4000 stepval = (n-1)*bs; 4001 } else { 4002 stepval = (m-1)*bs; 4003 } 4004 for (i=0; i<m; i++) { 4005 if (im[i] < 0) continue; 4006 #if defined(PETSC_USE_DEBUG) 4007 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 4008 #endif 4009 if (im[i] >= rstart && im[i] < rend) { 4010 row = im[i] - rstart; 4011 for (j=0; j<n; j++) { 4012 /* If NumCol = 1 then a copy is not required */ 4013 if ((roworiented) && (n == 1)) { 4014 barray = (MatScalar*)v + i*bs2; 4015 } else if ((!roworiented) && (m == 1)) { 4016 barray = (MatScalar*)v + j*bs2; 4017 } else { /* Here a copy is required */ 4018 if (roworiented) { 4019 value = v + i*(stepval+bs)*bs + j*bs; 4020 } else { 4021 value = v + j*(stepval+bs)*bs + i*bs; 4022 } 4023 for (ii=0; ii<bs; ii++,value+=stepval) { 4024 for (jj=0; jj<bs; jj++) { 4025 *barray++ = *value++; 4026 } 4027 } 4028 barray -=bs2; 4029 } 4030 4031 if (in[j] >= cstart && in[j] < cend){ 4032 col = in[j] - cstart; 4033 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4034 } 4035 else if (in[j] < 0) continue; 4036 #if defined(PETSC_USE_DEBUG) 4037 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 4038 #endif 4039 else { 4040 if (mat->was_assembled) { 4041 if (!baij->colmap) { 4042 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 4043 } 4044 4045 #if defined(PETSC_USE_DEBUG) 4046 #if defined (PETSC_USE_CTABLE) 4047 { PetscInt data; 4048 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 4049 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4050 } 4051 #else 4052 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4053 #endif 4054 #endif 4055 #if defined (PETSC_USE_CTABLE) 4056 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4057 col = (col - 1)/bs; 4058 #else 4059 col = (baij->colmap[in[j]] - 1)/bs; 4060 #endif 4061 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 4062 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 4063 col = in[j]; 4064 } 4065 } 4066 else col = in[j]; 4067 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4068 } 4069 } 4070 } else { 4071 if (!baij->donotstash) { 4072 if (roworiented) { 4073 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4074 } else { 4075 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4076 } 4077 } 4078 } 4079 } 4080 4081 /* task normally handled by MatSetValuesBlocked() */ 4082 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4083 PetscFunctionReturn(0); 4084 } 4085 4086 #undef __FUNCT__ 4087 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 4088 /*@ 4089 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 4090 CSR format the local rows. 4091 4092 Collective on MPI_Comm 4093 4094 Input Parameters: 4095 + comm - MPI communicator 4096 . bs - the block size, only a block size of 1 is supported 4097 . m - number of local rows (Cannot be PETSC_DECIDE) 4098 . n - This value should be the same as the local size used in creating the 4099 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4100 calculated if N is given) For square matrices n is almost always m. 4101 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4102 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4103 . i - row indices 4104 . j - column indices 4105 - a - matrix values 4106 4107 Output Parameter: 4108 . mat - the matrix 4109 4110 Level: intermediate 4111 4112 Notes: 4113 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4114 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4115 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4116 4117 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4118 4119 .keywords: matrix, aij, compressed row, sparse, parallel 4120 4121 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4122 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4123 @*/ 4124 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4125 { 4126 PetscErrorCode ierr; 4127 4128 4129 PetscFunctionBegin; 4130 if (i[0]) { 4131 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4132 } 4133 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4134 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4135 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4136 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 4137 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 4138 PetscFunctionReturn(0); 4139 } 4140