1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat); 7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec); 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 17 { 18 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 19 PetscErrorCode ierr; 20 PetscInt i,*idxb = 0; 21 PetscScalar *va,*vb; 22 Vec vtmp; 23 24 PetscFunctionBegin; 25 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 26 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 27 if (idx) { 28 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 29 } 30 31 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 32 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 33 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 34 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 35 36 for (i=0; i<A->rmap->n; i++) { 37 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 38 } 39 40 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 41 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 42 ierr = PetscFree(idxb);CHKERRQ(ierr); 43 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 44 PetscFunctionReturn(0); 45 } 46 47 EXTERN_C_BEGIN 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 50 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 51 { 52 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 53 PetscErrorCode ierr; 54 55 PetscFunctionBegin; 56 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 57 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 EXTERN_C_END 61 62 EXTERN_C_BEGIN 63 #undef __FUNCT__ 64 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 65 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 66 { 67 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 68 PetscErrorCode ierr; 69 70 PetscFunctionBegin; 71 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 72 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 73 PetscFunctionReturn(0); 74 } 75 EXTERN_C_END 76 77 /* 78 Local utility routine that creates a mapping from the global column 79 number to the local number in the off-diagonal part of the local 80 storage of the matrix. This is done in a non scalable way since the 81 length of colmap equals the global matrix length. 82 */ 83 #undef __FUNCT__ 84 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private" 85 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 86 { 87 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 88 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 89 PetscErrorCode ierr; 90 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 91 92 PetscFunctionBegin; 93 #if defined (PETSC_USE_CTABLE) 94 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 95 for (i=0; i<nbs; i++) { 96 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 97 } 98 #else 99 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 100 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 101 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 102 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 103 #endif 104 PetscFunctionReturn(0); 105 } 106 107 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 108 { \ 109 \ 110 brow = row/bs; \ 111 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 112 rmax = aimax[brow]; nrow = ailen[brow]; \ 113 bcol = col/bs; \ 114 ridx = row % bs; cidx = col % bs; \ 115 low = 0; high = nrow; \ 116 while (high-low > 3) { \ 117 t = (low+high)/2; \ 118 if (rp[t] > bcol) high = t; \ 119 else low = t; \ 120 } \ 121 for (_i=low; _i<high; _i++) { \ 122 if (rp[_i] > bcol) break; \ 123 if (rp[_i] == bcol) { \ 124 bap = ap + bs2*_i + bs*cidx + ridx; \ 125 if (addv == ADD_VALUES) *bap += value; \ 126 else *bap = value; \ 127 goto a_noinsert; \ 128 } \ 129 } \ 130 if (a->nonew == 1) goto a_noinsert; \ 131 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 132 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 133 N = nrow++ - 1; \ 134 /* shift up all the later entries in this row */ \ 135 for (ii=N; ii>=_i; ii--) { \ 136 rp[ii+1] = rp[ii]; \ 137 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 138 } \ 139 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 140 rp[_i] = bcol; \ 141 ap[bs2*_i + bs*cidx + ridx] = value; \ 142 a_noinsert:; \ 143 ailen[brow] = nrow; \ 144 } 145 146 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 147 { \ 148 brow = row/bs; \ 149 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 150 rmax = bimax[brow]; nrow = bilen[brow]; \ 151 bcol = col/bs; \ 152 ridx = row % bs; cidx = col % bs; \ 153 low = 0; high = nrow; \ 154 while (high-low > 3) { \ 155 t = (low+high)/2; \ 156 if (rp[t] > bcol) high = t; \ 157 else low = t; \ 158 } \ 159 for (_i=low; _i<high; _i++) { \ 160 if (rp[_i] > bcol) break; \ 161 if (rp[_i] == bcol) { \ 162 bap = ap + bs2*_i + bs*cidx + ridx; \ 163 if (addv == ADD_VALUES) *bap += value; \ 164 else *bap = value; \ 165 goto b_noinsert; \ 166 } \ 167 } \ 168 if (b->nonew == 1) goto b_noinsert; \ 169 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 170 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 171 CHKMEMQ;\ 172 N = nrow++ - 1; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 177 } \ 178 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 179 rp[_i] = bcol; \ 180 ap[bs2*_i + bs*cidx + ridx] = value; \ 181 b_noinsert:; \ 182 bilen[brow] = nrow; \ 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatSetValues_MPIBAIJ" 187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 188 { 189 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 190 MatScalar value; 191 PetscBool roworiented = baij->roworiented; 192 PetscErrorCode ierr; 193 PetscInt i,j,row,col; 194 PetscInt rstart_orig=mat->rmap->rstart; 195 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 196 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 197 198 /* Some Variables required in the macro */ 199 Mat A = baij->A; 200 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 201 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 202 MatScalar *aa=a->a; 203 204 Mat B = baij->B; 205 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 206 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 207 MatScalar *ba=b->a; 208 209 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 210 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 211 MatScalar *ap,*bap; 212 213 PetscFunctionBegin; 214 if (v) PetscValidScalarPointer(v,6); 215 for (i=0; i<m; i++) { 216 if (im[i] < 0) continue; 217 #if defined(PETSC_USE_DEBUG) 218 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 219 #endif 220 if (im[i] >= rstart_orig && im[i] < rend_orig) { 221 row = im[i] - rstart_orig; 222 for (j=0; j<n; j++) { 223 if (in[j] >= cstart_orig && in[j] < cend_orig) { 224 col = in[j] - cstart_orig; 225 if (roworiented) value = v[i*n+j]; 226 else value = v[i+j*m]; 227 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 228 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 229 } else if (in[j] < 0) continue; 230 #if defined(PETSC_USE_DEBUG) 231 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 232 #endif 233 else { 234 if (mat->was_assembled) { 235 if (!baij->colmap) { 236 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 237 } 238 #if defined (PETSC_USE_CTABLE) 239 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 240 col = col - 1; 241 #else 242 col = baij->colmap[in[j]/bs] - 1; 243 #endif 244 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 245 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 246 col = in[j]; 247 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 248 B = baij->B; 249 b = (Mat_SeqBAIJ*)(B)->data; 250 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 251 ba=b->a; 252 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 253 else col += in[j]%bs; 254 } else col = in[j]; 255 if (roworiented) value = v[i*n+j]; 256 else value = v[i+j*m]; 257 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 258 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 259 } 260 } 261 } else { 262 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 263 if (!baij->donotstash) { 264 mat->assembled = PETSC_FALSE; 265 if (roworiented) { 266 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 267 } else { 268 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 269 } 270 } 271 } 272 } 273 PetscFunctionReturn(0); 274 } 275 276 #undef __FUNCT__ 277 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 278 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 279 { 280 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 281 const PetscScalar *value; 282 MatScalar *barray=baij->barray; 283 PetscBool roworiented = baij->roworiented; 284 PetscErrorCode ierr; 285 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 286 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 287 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 288 289 PetscFunctionBegin; 290 if (!barray) { 291 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 292 baij->barray = barray; 293 } 294 295 if (roworiented) { 296 stepval = (n-1)*bs; 297 } else { 298 stepval = (m-1)*bs; 299 } 300 for (i=0; i<m; i++) { 301 if (im[i] < 0) continue; 302 #if defined(PETSC_USE_DEBUG) 303 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 304 #endif 305 if (im[i] >= rstart && im[i] < rend) { 306 row = im[i] - rstart; 307 for (j=0; j<n; j++) { 308 /* If NumCol = 1 then a copy is not required */ 309 if ((roworiented) && (n == 1)) { 310 barray = (MatScalar*)v + i*bs2; 311 } else if ((!roworiented) && (m == 1)) { 312 barray = (MatScalar*)v + j*bs2; 313 } else { /* Here a copy is required */ 314 if (roworiented) { 315 value = v + (i*(stepval+bs) + j)*bs; 316 } else { 317 value = v + (j*(stepval+bs) + i)*bs; 318 } 319 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 320 for (jj=0; jj<bs; jj++) { 321 barray[jj] = value[jj]; 322 } 323 barray += bs; 324 } 325 barray -= bs2; 326 } 327 328 if (in[j] >= cstart && in[j] < cend) { 329 col = in[j] - cstart; 330 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 331 } 332 else if (in[j] < 0) continue; 333 #if defined(PETSC_USE_DEBUG) 334 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 335 #endif 336 else { 337 if (mat->was_assembled) { 338 if (!baij->colmap) { 339 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 340 } 341 342 #if defined(PETSC_USE_DEBUG) 343 #if defined (PETSC_USE_CTABLE) 344 { PetscInt data; 345 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 346 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 347 } 348 #else 349 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 350 #endif 351 #endif 352 #if defined (PETSC_USE_CTABLE) 353 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 354 col = (col - 1)/bs; 355 #else 356 col = (baij->colmap[in[j]] - 1)/bs; 357 #endif 358 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 359 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 360 col = in[j]; 361 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]); 362 } else col = in[j]; 363 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 364 } 365 } 366 } else { 367 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 368 if (!baij->donotstash) { 369 if (roworiented) { 370 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 371 } else { 372 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 373 } 374 } 375 } 376 } 377 PetscFunctionReturn(0); 378 } 379 380 #define HASH_KEY 0.6180339887 381 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 382 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 383 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 384 #undef __FUNCT__ 385 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 386 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 387 { 388 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 389 PetscBool roworiented = baij->roworiented; 390 PetscErrorCode ierr; 391 PetscInt i,j,row,col; 392 PetscInt rstart_orig=mat->rmap->rstart; 393 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 394 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 395 PetscReal tmp; 396 MatScalar **HD = baij->hd,value; 397 #if defined(PETSC_USE_DEBUG) 398 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 399 #endif 400 401 PetscFunctionBegin; 402 if (v) PetscValidScalarPointer(v,6); 403 for (i=0; i<m; i++) { 404 #if defined(PETSC_USE_DEBUG) 405 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 406 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 407 #endif 408 row = im[i]; 409 if (row >= rstart_orig && row < rend_orig) { 410 for (j=0; j<n; j++) { 411 col = in[j]; 412 if (roworiented) value = v[i*n+j]; 413 else value = v[i+j*m]; 414 /* Look up PetscInto the Hash Table */ 415 key = (row/bs)*Nbs+(col/bs)+1; 416 h1 = HASH(size,key,tmp); 417 418 419 idx = h1; 420 #if defined(PETSC_USE_DEBUG) 421 insert_ct++; 422 total_ct++; 423 if (HT[idx] != key) { 424 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 425 if (idx == size) { 426 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 427 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 428 } 429 } 430 #else 431 if (HT[idx] != key) { 432 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 433 if (idx == size) { 434 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 435 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 436 } 437 } 438 #endif 439 /* A HASH table entry is found, so insert the values at the correct address */ 440 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 441 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 442 } 443 } else { 444 if (!baij->donotstash) { 445 if (roworiented) { 446 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 447 } else { 448 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 449 } 450 } 451 } 452 } 453 #if defined(PETSC_USE_DEBUG) 454 baij->ht_total_ct = total_ct; 455 baij->ht_insert_ct = insert_ct; 456 #endif 457 PetscFunctionReturn(0); 458 } 459 460 #undef __FUNCT__ 461 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 462 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 463 { 464 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 465 PetscBool roworiented = baij->roworiented; 466 PetscErrorCode ierr; 467 PetscInt i,j,ii,jj,row,col; 468 PetscInt rstart=baij->rstartbs; 469 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 470 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 471 PetscReal tmp; 472 MatScalar **HD = baij->hd,*baij_a; 473 const PetscScalar *v_t,*value; 474 #if defined(PETSC_USE_DEBUG) 475 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 476 #endif 477 478 PetscFunctionBegin; 479 if (roworiented) { 480 stepval = (n-1)*bs; 481 } else { 482 stepval = (m-1)*bs; 483 } 484 for (i=0; i<m; i++) { 485 #if defined(PETSC_USE_DEBUG) 486 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 487 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 488 #endif 489 row = im[i]; 490 v_t = v + i*nbs2; 491 if (row >= rstart && row < rend) { 492 for (j=0; j<n; j++) { 493 col = in[j]; 494 495 /* Look up into the Hash Table */ 496 key = row*Nbs+col+1; 497 h1 = HASH(size,key,tmp); 498 499 idx = h1; 500 #if defined(PETSC_USE_DEBUG) 501 total_ct++; 502 insert_ct++; 503 if (HT[idx] != key) { 504 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 505 if (idx == size) { 506 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 507 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 508 } 509 } 510 #else 511 if (HT[idx] != key) { 512 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 513 if (idx == size) { 514 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 515 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 516 } 517 } 518 #endif 519 baij_a = HD[idx]; 520 if (roworiented) { 521 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 522 /* value = v + (i*(stepval+bs)+j)*bs; */ 523 value = v_t; 524 v_t += bs; 525 if (addv == ADD_VALUES) { 526 for (ii=0; ii<bs; ii++,value+=stepval) { 527 for (jj=ii; jj<bs2; jj+=bs) { 528 baij_a[jj] += *value++; 529 } 530 } 531 } else { 532 for (ii=0; ii<bs; ii++,value+=stepval) { 533 for (jj=ii; jj<bs2; jj+=bs) { 534 baij_a[jj] = *value++; 535 } 536 } 537 } 538 } else { 539 value = v + j*(stepval+bs)*bs + i*bs; 540 if (addv == ADD_VALUES) { 541 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 542 for (jj=0; jj<bs; jj++) { 543 baij_a[jj] += *value++; 544 } 545 } 546 } else { 547 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 548 for (jj=0; jj<bs; jj++) { 549 baij_a[jj] = *value++; 550 } 551 } 552 } 553 } 554 } 555 } else { 556 if (!baij->donotstash) { 557 if (roworiented) { 558 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 559 } else { 560 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 561 } 562 } 563 } 564 } 565 #if defined(PETSC_USE_DEBUG) 566 baij->ht_total_ct = total_ct; 567 baij->ht_insert_ct = insert_ct; 568 #endif 569 PetscFunctionReturn(0); 570 } 571 572 #undef __FUNCT__ 573 #define __FUNCT__ "MatGetValues_MPIBAIJ" 574 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 575 { 576 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 577 PetscErrorCode ierr; 578 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 579 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 580 581 PetscFunctionBegin; 582 for (i=0; i<m; i++) { 583 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 584 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 585 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 586 row = idxm[i] - bsrstart; 587 for (j=0; j<n; j++) { 588 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 589 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 590 if (idxn[j] >= bscstart && idxn[j] < bscend) { 591 col = idxn[j] - bscstart; 592 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 593 } else { 594 if (!baij->colmap) { 595 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 596 } 597 #if defined (PETSC_USE_CTABLE) 598 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 599 data --; 600 #else 601 data = baij->colmap[idxn[j]/bs]-1; 602 #endif 603 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 604 else { 605 col = data + idxn[j]%bs; 606 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 607 } 608 } 609 } 610 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 611 } 612 PetscFunctionReturn(0); 613 } 614 615 #undef __FUNCT__ 616 #define __FUNCT__ "MatNorm_MPIBAIJ" 617 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 618 { 619 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 620 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 621 PetscErrorCode ierr; 622 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 623 PetscReal sum = 0.0; 624 MatScalar *v; 625 626 PetscFunctionBegin; 627 if (baij->size == 1) { 628 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 629 } else { 630 if (type == NORM_FROBENIUS) { 631 v = amat->a; 632 nz = amat->nz*bs2; 633 for (i=0; i<nz; i++) { 634 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 635 } 636 v = bmat->a; 637 nz = bmat->nz*bs2; 638 for (i=0; i<nz; i++) { 639 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 640 } 641 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 642 *nrm = PetscSqrtReal(*nrm); 643 } else if (type == NORM_1) { /* max column sum */ 644 PetscReal *tmp,*tmp2; 645 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 646 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 647 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 648 v = amat->a; jj = amat->j; 649 for (i=0; i<amat->nz; i++) { 650 for (j=0; j<bs; j++) { 651 col = bs*(cstart + *jj) + j; /* column index */ 652 for (row=0; row<bs; row++) { 653 tmp[col] += PetscAbsScalar(*v); v++; 654 } 655 } 656 jj++; 657 } 658 v = bmat->a; jj = bmat->j; 659 for (i=0; i<bmat->nz; i++) { 660 for (j=0; j<bs; j++) { 661 col = bs*garray[*jj] + j; 662 for (row=0; row<bs; row++) { 663 tmp[col] += PetscAbsScalar(*v); v++; 664 } 665 } 666 jj++; 667 } 668 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 669 *nrm = 0.0; 670 for (j=0; j<mat->cmap->N; j++) { 671 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 672 } 673 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 674 } else if (type == NORM_INFINITY) { /* max row sum */ 675 PetscReal *sums; 676 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 677 sum = 0.0; 678 for (j=0; j<amat->mbs; j++) { 679 for (row=0; row<bs; row++) sums[row] = 0.0; 680 v = amat->a + bs2*amat->i[j]; 681 nz = amat->i[j+1]-amat->i[j]; 682 for (i=0; i<nz; i++) { 683 for (col=0; col<bs; col++) { 684 for (row=0; row<bs; row++) { 685 sums[row] += PetscAbsScalar(*v); v++; 686 } 687 } 688 } 689 v = bmat->a + bs2*bmat->i[j]; 690 nz = bmat->i[j+1]-bmat->i[j]; 691 for (i=0; i<nz; i++) { 692 for (col=0; col<bs; col++) { 693 for (row=0; row<bs; row++) { 694 sums[row] += PetscAbsScalar(*v); v++; 695 } 696 } 697 } 698 for (row=0; row<bs; row++) { 699 if (sums[row] > sum) sum = sums[row]; 700 } 701 } 702 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 703 ierr = PetscFree(sums);CHKERRQ(ierr); 704 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet"); 705 } 706 PetscFunctionReturn(0); 707 } 708 709 /* 710 Creates the hash table, and sets the table 711 This table is created only once. 712 If new entried need to be added to the matrix 713 then the hash table has to be destroyed and 714 recreated. 715 */ 716 #undef __FUNCT__ 717 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 718 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 719 { 720 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 721 Mat A = baij->A,B=baij->B; 722 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 723 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 724 PetscErrorCode ierr; 725 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 726 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 727 PetscInt *HT,key; 728 MatScalar **HD; 729 PetscReal tmp; 730 #if defined(PETSC_USE_INFO) 731 PetscInt ct=0,max=0; 732 #endif 733 734 PetscFunctionBegin; 735 if (baij->ht) PetscFunctionReturn(0); 736 737 baij->ht_size = (PetscInt)(factor*nz); 738 ht_size = baij->ht_size; 739 740 /* Allocate Memory for Hash Table */ 741 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 742 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 743 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 744 HD = baij->hd; 745 HT = baij->ht; 746 747 /* Loop Over A */ 748 for (i=0; i<a->mbs; i++) { 749 for (j=ai[i]; j<ai[i+1]; j++) { 750 row = i+rstart; 751 col = aj[j]+cstart; 752 753 key = row*Nbs + col + 1; 754 h1 = HASH(ht_size,key,tmp); 755 for (k=0; k<ht_size; k++) { 756 if (!HT[(h1+k)%ht_size]) { 757 HT[(h1+k)%ht_size] = key; 758 HD[(h1+k)%ht_size] = a->a + j*bs2; 759 break; 760 #if defined(PETSC_USE_INFO) 761 } else { 762 ct++; 763 #endif 764 } 765 } 766 #if defined(PETSC_USE_INFO) 767 if (k> max) max = k; 768 #endif 769 } 770 } 771 /* Loop Over B */ 772 for (i=0; i<b->mbs; i++) { 773 for (j=bi[i]; j<bi[i+1]; j++) { 774 row = i+rstart; 775 col = garray[bj[j]]; 776 key = row*Nbs + col + 1; 777 h1 = HASH(ht_size,key,tmp); 778 for (k=0; k<ht_size; k++) { 779 if (!HT[(h1+k)%ht_size]) { 780 HT[(h1+k)%ht_size] = key; 781 HD[(h1+k)%ht_size] = b->a + j*bs2; 782 break; 783 #if defined(PETSC_USE_INFO) 784 } else { 785 ct++; 786 #endif 787 } 788 } 789 #if defined(PETSC_USE_INFO) 790 if (k> max) max = k; 791 #endif 792 } 793 } 794 795 /* Print Summary */ 796 #if defined(PETSC_USE_INFO) 797 for (i=0,j=0; i<ht_size; i++) { 798 if (HT[i]) {j++;} 799 } 800 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 801 #endif 802 PetscFunctionReturn(0); 803 } 804 805 #undef __FUNCT__ 806 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 807 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 808 { 809 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 810 PetscErrorCode ierr; 811 PetscInt nstash,reallocs; 812 InsertMode addv; 813 814 PetscFunctionBegin; 815 if (baij->donotstash || mat->nooffprocentries) { 816 PetscFunctionReturn(0); 817 } 818 819 /* make sure all processors are either in INSERTMODE or ADDMODE */ 820 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 821 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 822 mat->insertmode = addv; /* in case this processor had no cache */ 823 824 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 825 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 826 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 827 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 828 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 829 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 830 PetscFunctionReturn(0); 831 } 832 833 #undef __FUNCT__ 834 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 835 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 836 { 837 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 838 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 839 PetscErrorCode ierr; 840 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 841 PetscInt *row,*col; 842 PetscBool r1,r2,r3,other_disassembled; 843 MatScalar *val; 844 InsertMode addv = mat->insertmode; 845 PetscMPIInt n; 846 847 PetscFunctionBegin; 848 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 849 if (!baij->donotstash && !mat->nooffprocentries) { 850 while (1) { 851 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 852 if (!flg) break; 853 854 for (i=0; i<n;) { 855 /* Now identify the consecutive vals belonging to the same row */ 856 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 857 if (j < n) ncols = j-i; 858 else ncols = n-i; 859 /* Now assemble all these values with a single function call */ 860 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 861 i = j; 862 } 863 } 864 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 865 /* Now process the block-stash. Since the values are stashed column-oriented, 866 set the roworiented flag to column oriented, and after MatSetValues() 867 restore the original flags */ 868 r1 = baij->roworiented; 869 r2 = a->roworiented; 870 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 871 baij->roworiented = PETSC_FALSE; 872 a->roworiented = PETSC_FALSE; 873 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 874 while (1) { 875 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 876 if (!flg) break; 877 878 for (i=0; i<n;) { 879 /* Now identify the consecutive vals belonging to the same row */ 880 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 881 if (j < n) ncols = j-i; 882 else ncols = n-i; 883 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 884 i = j; 885 } 886 } 887 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 888 baij->roworiented = r1; 889 a->roworiented = r2; 890 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 891 } 892 893 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 894 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 895 896 /* determine if any processor has disassembled, if so we must 897 also disassemble ourselfs, in order that we may reassemble. */ 898 /* 899 if nonzero structure of submatrix B cannot change then we know that 900 no processor disassembled thus we can skip this stuff 901 */ 902 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 903 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 904 if (mat->was_assembled && !other_disassembled) { 905 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 906 } 907 } 908 909 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 910 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 911 } 912 ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 913 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 914 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 915 916 #if defined(PETSC_USE_INFO) 917 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 918 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 919 baij->ht_total_ct = 0; 920 baij->ht_insert_ct = 0; 921 } 922 #endif 923 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 924 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 925 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 926 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 927 } 928 929 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 930 baij->rowvalues = 0; 931 PetscFunctionReturn(0); 932 } 933 934 #undef __FUNCT__ 935 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 936 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 937 { 938 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 939 PetscErrorCode ierr; 940 PetscMPIInt size = baij->size,rank = baij->rank; 941 PetscInt bs = mat->rmap->bs; 942 PetscBool iascii,isdraw; 943 PetscViewer sviewer; 944 PetscViewerFormat format; 945 946 PetscFunctionBegin; 947 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 948 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 949 if (iascii) { 950 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 951 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 952 MatInfo info; 953 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 954 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 955 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 956 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 957 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 958 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 959 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 960 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 961 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 962 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 963 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 964 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 965 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 966 PetscFunctionReturn(0); 967 } else if (format == PETSC_VIEWER_ASCII_INFO) { 968 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 969 PetscFunctionReturn(0); 970 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 971 PetscFunctionReturn(0); 972 } 973 } 974 975 if (isdraw) { 976 PetscDraw draw; 977 PetscBool isnull; 978 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 979 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 980 } 981 982 if (size == 1) { 983 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 984 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 985 } else { 986 /* assemble the entire matrix onto first processor. */ 987 Mat A; 988 Mat_SeqBAIJ *Aloc; 989 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 990 MatScalar *a; 991 992 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 993 /* Perhaps this should be the type of mat? */ 994 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 995 if (!rank) { 996 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 997 } else { 998 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 999 } 1000 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1001 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1002 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1003 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1004 1005 /* copy over the A part */ 1006 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1007 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1008 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1009 1010 for (i=0; i<mbs; i++) { 1011 rvals[0] = bs*(baij->rstartbs + i); 1012 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1013 for (j=ai[i]; j<ai[i+1]; j++) { 1014 col = (baij->cstartbs+aj[j])*bs; 1015 for (k=0; k<bs; k++) { 1016 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1017 col++; a += bs; 1018 } 1019 } 1020 } 1021 /* copy over the B part */ 1022 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1023 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1024 for (i=0; i<mbs; i++) { 1025 rvals[0] = bs*(baij->rstartbs + i); 1026 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1027 for (j=ai[i]; j<ai[i+1]; j++) { 1028 col = baij->garray[aj[j]]*bs; 1029 for (k=0; k<bs; k++) { 1030 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1031 col++; a += bs; 1032 } 1033 } 1034 } 1035 ierr = PetscFree(rvals);CHKERRQ(ierr); 1036 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1037 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1038 /* 1039 Everyone has to call to draw the matrix since the graphics waits are 1040 synchronized across all processors that share the PetscDraw object 1041 */ 1042 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1043 if (!rank) { 1044 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1045 /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/ 1046 PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ); 1047 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1048 } 1049 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1050 ierr = MatDestroy(&A);CHKERRQ(ierr); 1051 } 1052 PetscFunctionReturn(0); 1053 } 1054 1055 #undef __FUNCT__ 1056 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1057 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1058 { 1059 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1060 Mat_SeqBAIJ* A = (Mat_SeqBAIJ*)a->A->data; 1061 Mat_SeqBAIJ* B = (Mat_SeqBAIJ*)a->B->data; 1062 PetscErrorCode ierr; 1063 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1064 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1065 int fd; 1066 PetscScalar *column_values; 1067 FILE *file; 1068 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1069 PetscInt message_count,flowcontrolcount; 1070 1071 PetscFunctionBegin; 1072 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1073 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1074 nz = bs2*(A->nz + B->nz); 1075 rlen = mat->rmap->n; 1076 if (!rank) { 1077 header[0] = MAT_FILE_CLASSID; 1078 header[1] = mat->rmap->N; 1079 header[2] = mat->cmap->N; 1080 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1081 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1082 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1083 /* get largest number of rows any processor has */ 1084 range = mat->rmap->range; 1085 for (i=1; i<size; i++) { 1086 rlen = PetscMax(rlen,range[i+1] - range[i]); 1087 } 1088 } else { 1089 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1090 } 1091 1092 ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr); 1093 /* compute lengths of each row */ 1094 for (i=0; i<a->mbs; i++) { 1095 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1096 } 1097 /* store the row lengths to the file */ 1098 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1099 if (!rank) { 1100 MPI_Status status; 1101 ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr); 1102 rlen = (range[1] - range[0])/bs; 1103 for (i=0; i<rlen; i++) { 1104 for (j=0; j<bs; j++) { 1105 row_lens[i*bs+j] = bs*crow_lens[i]; 1106 } 1107 } 1108 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1109 for (i=1; i<size; i++) { 1110 rlen = (range[i+1] - range[i])/bs; 1111 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1112 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1113 for (k=0; k<rlen; k++) { 1114 for (j=0; j<bs; j++) { 1115 row_lens[k*bs+j] = bs*crow_lens[k]; 1116 } 1117 } 1118 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1119 } 1120 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1121 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1122 } else { 1123 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1124 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1125 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1126 } 1127 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1128 1129 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1130 information needed to make it for each row from a block row. This does require more communication but still not more than 1131 the communication needed for the nonzero values */ 1132 nzmax = nz; /* space a largest processor needs */ 1133 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1134 ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1135 cnt = 0; 1136 for (i=0; i<a->mbs; i++) { 1137 pcnt = cnt; 1138 for (j=B->i[i]; j<B->i[i+1]; j++) { 1139 if ((col = garray[B->j[j]]) > cstart) break; 1140 for (l=0; l<bs; l++) { 1141 column_indices[cnt++] = bs*col+l; 1142 } 1143 } 1144 for (k=A->i[i]; k<A->i[i+1]; k++) { 1145 for (l=0; l<bs; l++) { 1146 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1147 } 1148 } 1149 for (; j<B->i[i+1]; j++) { 1150 for (l=0; l<bs; l++) { 1151 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1152 } 1153 } 1154 len = cnt - pcnt; 1155 for (k=1; k<bs; k++) { 1156 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1157 cnt += len; 1158 } 1159 } 1160 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1161 1162 /* store the columns to the file */ 1163 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1164 if (!rank) { 1165 MPI_Status status; 1166 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1167 for (i=1; i<size; i++) { 1168 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1169 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1170 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1171 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1172 } 1173 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1174 } else { 1175 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1176 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1177 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1178 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1179 } 1180 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1181 1182 /* load up the numerical values */ 1183 ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1184 cnt = 0; 1185 for (i=0; i<a->mbs; i++) { 1186 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1187 for (j=B->i[i]; j<B->i[i+1]; j++) { 1188 if (garray[B->j[j]] > cstart) break; 1189 for (l=0; l<bs; l++) { 1190 for (ll=0; ll<bs; ll++) { 1191 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1192 } 1193 } 1194 cnt += bs; 1195 } 1196 for (k=A->i[i]; k<A->i[i+1]; k++) { 1197 for (l=0; l<bs; l++) { 1198 for (ll=0; ll<bs; ll++) { 1199 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1200 } 1201 } 1202 cnt += bs; 1203 } 1204 for (; j<B->i[i+1]; j++) { 1205 for (l=0; l<bs; l++) { 1206 for (ll=0; ll<bs; ll++) { 1207 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1208 } 1209 } 1210 cnt += bs; 1211 } 1212 cnt += (bs-1)*rlen; 1213 } 1214 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1215 1216 /* store the column values to the file */ 1217 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1218 if (!rank) { 1219 MPI_Status status; 1220 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1221 for (i=1; i<size; i++) { 1222 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1223 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1224 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1225 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1226 } 1227 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1228 } else { 1229 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1230 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1231 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1232 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1233 } 1234 ierr = PetscFree(column_values);CHKERRQ(ierr); 1235 1236 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1237 if (file) { 1238 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1239 } 1240 PetscFunctionReturn(0); 1241 } 1242 1243 #undef __FUNCT__ 1244 #define __FUNCT__ "MatView_MPIBAIJ" 1245 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1246 { 1247 PetscErrorCode ierr; 1248 PetscBool iascii,isdraw,issocket,isbinary; 1249 1250 PetscFunctionBegin; 1251 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1252 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1253 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1254 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1255 if (iascii || isdraw || issocket) { 1256 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1257 } else if (isbinary) { 1258 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1259 } 1260 PetscFunctionReturn(0); 1261 } 1262 1263 #undef __FUNCT__ 1264 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1265 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1266 { 1267 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1268 PetscErrorCode ierr; 1269 1270 PetscFunctionBegin; 1271 #if defined(PETSC_USE_LOG) 1272 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1273 #endif 1274 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1275 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1276 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1277 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1278 #if defined (PETSC_USE_CTABLE) 1279 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1280 #else 1281 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1282 #endif 1283 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1284 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1285 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1286 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1287 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1288 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1289 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1290 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1291 1292 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1293 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1294 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",PETSC_NULL);CHKERRQ(ierr); 1302 PetscFunctionReturn(0); 1303 } 1304 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "MatMult_MPIBAIJ" 1307 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1308 { 1309 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1310 PetscErrorCode ierr; 1311 PetscInt nt; 1312 1313 PetscFunctionBegin; 1314 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1315 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1316 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1317 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1318 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1319 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1320 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1321 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1322 PetscFunctionReturn(0); 1323 } 1324 1325 #undef __FUNCT__ 1326 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1327 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1328 { 1329 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1330 PetscErrorCode ierr; 1331 1332 PetscFunctionBegin; 1333 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1334 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1335 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1336 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1337 PetscFunctionReturn(0); 1338 } 1339 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1342 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1343 { 1344 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1345 PetscErrorCode ierr; 1346 PetscBool merged; 1347 1348 PetscFunctionBegin; 1349 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1350 /* do nondiagonal part */ 1351 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1352 if (!merged) { 1353 /* send it on its way */ 1354 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1355 /* do local part */ 1356 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1357 /* receive remote parts: note this assumes the values are not actually */ 1358 /* inserted in yy until the next line */ 1359 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1360 } else { 1361 /* do local part */ 1362 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1363 /* send it on its way */ 1364 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1365 /* values actually were received in the Begin() but we need to call this nop */ 1366 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1367 } 1368 PetscFunctionReturn(0); 1369 } 1370 1371 #undef __FUNCT__ 1372 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1373 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1374 { 1375 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1376 PetscErrorCode ierr; 1377 1378 PetscFunctionBegin; 1379 /* do nondiagonal part */ 1380 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1381 /* send it on its way */ 1382 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1383 /* do local part */ 1384 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1385 /* receive remote parts: note this assumes the values are not actually */ 1386 /* inserted in yy until the next line, which is true for my implementation*/ 1387 /* but is not perhaps always true. */ 1388 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1389 PetscFunctionReturn(0); 1390 } 1391 1392 /* 1393 This only works correctly for square matrices where the subblock A->A is the 1394 diagonal block 1395 */ 1396 #undef __FUNCT__ 1397 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1398 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1399 { 1400 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1401 PetscErrorCode ierr; 1402 1403 PetscFunctionBegin; 1404 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1405 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1406 PetscFunctionReturn(0); 1407 } 1408 1409 #undef __FUNCT__ 1410 #define __FUNCT__ "MatScale_MPIBAIJ" 1411 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1412 { 1413 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1414 PetscErrorCode ierr; 1415 1416 PetscFunctionBegin; 1417 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1418 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1419 PetscFunctionReturn(0); 1420 } 1421 1422 #undef __FUNCT__ 1423 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1424 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1425 { 1426 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1427 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1428 PetscErrorCode ierr; 1429 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1430 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1431 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1432 1433 PetscFunctionBegin; 1434 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1435 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1436 mat->getrowactive = PETSC_TRUE; 1437 1438 if (!mat->rowvalues && (idx || v)) { 1439 /* 1440 allocate enough space to hold information from the longest row. 1441 */ 1442 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1443 PetscInt max = 1,mbs = mat->mbs,tmp; 1444 for (i=0; i<mbs; i++) { 1445 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1446 if (max < tmp) { max = tmp; } 1447 } 1448 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1449 } 1450 lrow = row - brstart; 1451 1452 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1453 if (!v) {pvA = 0; pvB = 0;} 1454 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1455 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1456 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1457 nztot = nzA + nzB; 1458 1459 cmap = mat->garray; 1460 if (v || idx) { 1461 if (nztot) { 1462 /* Sort by increasing column numbers, assuming A and B already sorted */ 1463 PetscInt imark = -1; 1464 if (v) { 1465 *v = v_p = mat->rowvalues; 1466 for (i=0; i<nzB; i++) { 1467 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1468 else break; 1469 } 1470 imark = i; 1471 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1472 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1473 } 1474 if (idx) { 1475 *idx = idx_p = mat->rowindices; 1476 if (imark > -1) { 1477 for (i=0; i<imark; i++) { 1478 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1479 } 1480 } else { 1481 for (i=0; i<nzB; i++) { 1482 if (cmap[cworkB[i]/bs] < cstart) 1483 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1484 else break; 1485 } 1486 imark = i; 1487 } 1488 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1489 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1490 } 1491 } else { 1492 if (idx) *idx = 0; 1493 if (v) *v = 0; 1494 } 1495 } 1496 *nz = nztot; 1497 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1498 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1499 PetscFunctionReturn(0); 1500 } 1501 1502 #undef __FUNCT__ 1503 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1504 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1505 { 1506 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1507 1508 PetscFunctionBegin; 1509 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1510 baij->getrowactive = PETSC_FALSE; 1511 PetscFunctionReturn(0); 1512 } 1513 1514 #undef __FUNCT__ 1515 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1516 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1517 { 1518 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1519 PetscErrorCode ierr; 1520 1521 PetscFunctionBegin; 1522 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1523 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1524 PetscFunctionReturn(0); 1525 } 1526 1527 #undef __FUNCT__ 1528 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1529 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1530 { 1531 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1532 Mat A = a->A,B = a->B; 1533 PetscErrorCode ierr; 1534 PetscReal isend[5],irecv[5]; 1535 1536 PetscFunctionBegin; 1537 info->block_size = (PetscReal)matin->rmap->bs; 1538 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1539 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1540 isend[3] = info->memory; isend[4] = info->mallocs; 1541 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1542 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1543 isend[3] += info->memory; isend[4] += info->mallocs; 1544 if (flag == MAT_LOCAL) { 1545 info->nz_used = isend[0]; 1546 info->nz_allocated = isend[1]; 1547 info->nz_unneeded = isend[2]; 1548 info->memory = isend[3]; 1549 info->mallocs = isend[4]; 1550 } else if (flag == MAT_GLOBAL_MAX) { 1551 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1552 info->nz_used = irecv[0]; 1553 info->nz_allocated = irecv[1]; 1554 info->nz_unneeded = irecv[2]; 1555 info->memory = irecv[3]; 1556 info->mallocs = irecv[4]; 1557 } else if (flag == MAT_GLOBAL_SUM) { 1558 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1559 info->nz_used = irecv[0]; 1560 info->nz_allocated = irecv[1]; 1561 info->nz_unneeded = irecv[2]; 1562 info->memory = irecv[3]; 1563 info->mallocs = irecv[4]; 1564 } else SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1565 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1566 info->fill_ratio_needed = 0; 1567 info->factor_mallocs = 0; 1568 PetscFunctionReturn(0); 1569 } 1570 1571 #undef __FUNCT__ 1572 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1573 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1574 { 1575 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1576 PetscErrorCode ierr; 1577 1578 PetscFunctionBegin; 1579 switch (op) { 1580 case MAT_NEW_NONZERO_LOCATIONS: 1581 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1582 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1583 case MAT_KEEP_NONZERO_PATTERN: 1584 case MAT_NEW_NONZERO_LOCATION_ERR: 1585 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1586 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1587 break; 1588 case MAT_ROW_ORIENTED: 1589 a->roworiented = flg; 1590 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1591 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1592 break; 1593 case MAT_NEW_DIAGONALS: 1594 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1595 break; 1596 case MAT_IGNORE_OFF_PROC_ENTRIES: 1597 a->donotstash = flg; 1598 break; 1599 case MAT_USE_HASH_TABLE: 1600 a->ht_flag = flg; 1601 break; 1602 case MAT_SYMMETRIC: 1603 case MAT_STRUCTURALLY_SYMMETRIC: 1604 case MAT_HERMITIAN: 1605 case MAT_SYMMETRY_ETERNAL: 1606 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1607 break; 1608 default: 1609 SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op); 1610 } 1611 PetscFunctionReturn(0); 1612 } 1613 1614 #undef __FUNCT__ 1615 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1616 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1617 { 1618 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1619 Mat_SeqBAIJ *Aloc; 1620 Mat B; 1621 PetscErrorCode ierr; 1622 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1623 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1624 MatScalar *a; 1625 1626 PetscFunctionBegin; 1627 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1628 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1629 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1630 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1631 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1632 /* Do not know preallocation information, but must set block size */ 1633 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,PETSC_NULL,PETSC_DECIDE,PETSC_NULL);CHKERRQ(ierr); 1634 } else { 1635 B = *matout; 1636 } 1637 1638 /* copy over the A part */ 1639 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1640 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1641 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1642 1643 for (i=0; i<mbs; i++) { 1644 rvals[0] = bs*(baij->rstartbs + i); 1645 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1646 for (j=ai[i]; j<ai[i+1]; j++) { 1647 col = (baij->cstartbs+aj[j])*bs; 1648 for (k=0; k<bs; k++) { 1649 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1650 col++; a += bs; 1651 } 1652 } 1653 } 1654 /* copy over the B part */ 1655 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1656 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1657 for (i=0; i<mbs; i++) { 1658 rvals[0] = bs*(baij->rstartbs + i); 1659 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1660 for (j=ai[i]; j<ai[i+1]; j++) { 1661 col = baij->garray[aj[j]]*bs; 1662 for (k=0; k<bs; k++) { 1663 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1664 col++; a += bs; 1665 } 1666 } 1667 } 1668 ierr = PetscFree(rvals);CHKERRQ(ierr); 1669 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1670 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1671 1672 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1673 *matout = B; 1674 } else { 1675 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1676 } 1677 PetscFunctionReturn(0); 1678 } 1679 1680 #undef __FUNCT__ 1681 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1682 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1683 { 1684 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1685 Mat a = baij->A,b = baij->B; 1686 PetscErrorCode ierr; 1687 PetscInt s1,s2,s3; 1688 1689 PetscFunctionBegin; 1690 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1691 if (rr) { 1692 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1693 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1694 /* Overlap communication with computation. */ 1695 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1696 } 1697 if (ll) { 1698 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1699 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1700 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1701 } 1702 /* scale the diagonal block */ 1703 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1704 1705 if (rr) { 1706 /* Do a scatter end and then right scale the off-diagonal block */ 1707 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1708 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1709 } 1710 PetscFunctionReturn(0); 1711 } 1712 1713 #undef __FUNCT__ 1714 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1715 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1716 { 1717 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1718 PetscErrorCode ierr; 1719 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1720 PetscInt i,*owners = A->rmap->range; 1721 PetscInt *nprocs,j,idx,nsends,row; 1722 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1723 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1724 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1725 MPI_Comm comm = ((PetscObject)A)->comm; 1726 MPI_Request *send_waits,*recv_waits; 1727 MPI_Status recv_status,*send_status; 1728 const PetscScalar *xx; 1729 PetscScalar *bb; 1730 #if defined(PETSC_DEBUG) 1731 PetscBool found = PETSC_FALSE; 1732 #endif 1733 1734 PetscFunctionBegin; 1735 /* first count number of contributors to each processor */ 1736 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1737 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1738 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1739 j = 0; 1740 for (i=0; i<N; i++) { 1741 if (lastidx > (idx = rows[i])) j = 0; 1742 lastidx = idx; 1743 for (; j<size; j++) { 1744 if (idx >= owners[j] && idx < owners[j+1]) { 1745 nprocs[2*j]++; 1746 nprocs[2*j+1] = 1; 1747 owner[i] = j; 1748 #if defined(PETSC_DEBUG) 1749 found = PETSC_TRUE; 1750 #endif 1751 break; 1752 } 1753 } 1754 #if defined(PETSC_DEBUG) 1755 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1756 found = PETSC_FALSE; 1757 #endif 1758 } 1759 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1760 1761 if (A->nooffproczerorows) { 1762 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 1763 nrecvs = nsends; 1764 nmax = N; 1765 } else { 1766 /* inform other processors of number of messages and max length*/ 1767 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1768 } 1769 1770 /* post receives: */ 1771 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1772 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1773 for (i=0; i<nrecvs; i++) { 1774 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1775 } 1776 1777 /* do sends: 1778 1) starts[i] gives the starting index in svalues for stuff going to 1779 the ith processor 1780 */ 1781 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1782 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1783 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1784 starts[0] = 0; 1785 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1786 for (i=0; i<N; i++) { 1787 svalues[starts[owner[i]]++] = rows[i]; 1788 } 1789 1790 starts[0] = 0; 1791 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1792 count = 0; 1793 for (i=0; i<size; i++) { 1794 if (nprocs[2*i+1]) { 1795 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1796 } 1797 } 1798 ierr = PetscFree(starts);CHKERRQ(ierr); 1799 1800 base = owners[rank]; 1801 1802 /* wait on receives */ 1803 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1804 count = nrecvs; 1805 slen = 0; 1806 while (count) { 1807 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1808 /* unpack receives into our local space */ 1809 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1810 source[imdex] = recv_status.MPI_SOURCE; 1811 lens[imdex] = n; 1812 slen += n; 1813 count--; 1814 } 1815 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1816 1817 /* move the data into the send scatter */ 1818 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1819 count = 0; 1820 for (i=0; i<nrecvs; i++) { 1821 values = rvalues + i*nmax; 1822 for (j=0; j<lens[i]; j++) { 1823 lrows[count++] = values[j] - base; 1824 } 1825 } 1826 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1827 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1828 ierr = PetscFree(owner);CHKERRQ(ierr); 1829 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1830 1831 /* fix right hand side if needed */ 1832 if (x && b) { 1833 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1834 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1835 for (i=0; i<slen; i++) { 1836 bb[lrows[i]] = diag*xx[lrows[i]]; 1837 } 1838 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1839 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1840 } 1841 1842 /* actually zap the local rows */ 1843 /* 1844 Zero the required rows. If the "diagonal block" of the matrix 1845 is square and the user wishes to set the diagonal we use separate 1846 code so that MatSetValues() is not called for each diagonal allocating 1847 new memory, thus calling lots of mallocs and slowing things down. 1848 1849 */ 1850 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1851 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1852 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1853 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 1854 } else if (diag != 0.0) { 1855 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1856 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1857 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1858 for (i=0; i<slen; i++) { 1859 row = lrows[i] + rstart_bs; 1860 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1861 } 1862 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1863 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1864 } else { 1865 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1866 } 1867 1868 ierr = PetscFree(lrows);CHKERRQ(ierr); 1869 1870 /* wait on sends */ 1871 if (nsends) { 1872 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1873 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1874 ierr = PetscFree(send_status);CHKERRQ(ierr); 1875 } 1876 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1877 ierr = PetscFree(svalues);CHKERRQ(ierr); 1878 PetscFunctionReturn(0); 1879 } 1880 1881 #undef __FUNCT__ 1882 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1883 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1884 { 1885 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1886 PetscErrorCode ierr; 1887 1888 PetscFunctionBegin; 1889 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1890 PetscFunctionReturn(0); 1891 } 1892 1893 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1894 1895 #undef __FUNCT__ 1896 #define __FUNCT__ "MatEqual_MPIBAIJ" 1897 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1898 { 1899 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1900 Mat a,b,c,d; 1901 PetscBool flg; 1902 PetscErrorCode ierr; 1903 1904 PetscFunctionBegin; 1905 a = matA->A; b = matA->B; 1906 c = matB->A; d = matB->B; 1907 1908 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1909 if (flg) { 1910 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1911 } 1912 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1913 PetscFunctionReturn(0); 1914 } 1915 1916 #undef __FUNCT__ 1917 #define __FUNCT__ "MatCopy_MPIBAIJ" 1918 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1919 { 1920 PetscErrorCode ierr; 1921 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1922 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1923 1924 PetscFunctionBegin; 1925 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1926 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1927 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1928 } else { 1929 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1930 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1931 } 1932 PetscFunctionReturn(0); 1933 } 1934 1935 #undef __FUNCT__ 1936 #define __FUNCT__ "MatSetUp_MPIBAIJ" 1937 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1938 { 1939 PetscErrorCode ierr; 1940 1941 PetscFunctionBegin; 1942 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1943 PetscFunctionReturn(0); 1944 } 1945 1946 #undef __FUNCT__ 1947 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1948 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1949 { 1950 PetscErrorCode ierr; 1951 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1952 PetscBLASInt bnz,one=1; 1953 Mat_SeqBAIJ *x,*y; 1954 1955 PetscFunctionBegin; 1956 if (str == SAME_NONZERO_PATTERN) { 1957 PetscScalar alpha = a; 1958 x = (Mat_SeqBAIJ *)xx->A->data; 1959 y = (Mat_SeqBAIJ *)yy->A->data; 1960 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 1961 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1962 x = (Mat_SeqBAIJ *)xx->B->data; 1963 y = (Mat_SeqBAIJ *)yy->B->data; 1964 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 1965 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1966 } else { 1967 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1968 } 1969 PetscFunctionReturn(0); 1970 } 1971 1972 #undef __FUNCT__ 1973 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1974 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1975 { 1976 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1977 PetscErrorCode ierr; 1978 1979 PetscFunctionBegin; 1980 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1981 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1982 PetscFunctionReturn(0); 1983 } 1984 1985 #undef __FUNCT__ 1986 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 1987 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1988 { 1989 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1990 PetscErrorCode ierr; 1991 1992 PetscFunctionBegin; 1993 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1994 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1995 PetscFunctionReturn(0); 1996 } 1997 1998 #undef __FUNCT__ 1999 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2000 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2001 { 2002 PetscErrorCode ierr; 2003 IS iscol_local; 2004 PetscInt csize; 2005 2006 PetscFunctionBegin; 2007 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2008 if (call == MAT_REUSE_MATRIX) { 2009 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2010 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2011 } else { 2012 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2013 } 2014 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2015 if (call == MAT_INITIAL_MATRIX) { 2016 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2017 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2018 } 2019 PetscFunctionReturn(0); 2020 } 2021 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*); 2022 #undef __FUNCT__ 2023 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2024 /* 2025 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2026 in local and then by concatenating the local matrices the end result. 2027 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2028 */ 2029 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2030 { 2031 PetscErrorCode ierr; 2032 PetscMPIInt rank,size; 2033 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2034 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow; 2035 Mat M,Mreuse; 2036 MatScalar *vwork,*aa; 2037 MPI_Comm comm = ((PetscObject)mat)->comm; 2038 IS isrow_new, iscol_new; 2039 PetscBool idflag,allrows, allcols; 2040 Mat_SeqBAIJ *aij; 2041 2042 PetscFunctionBegin; 2043 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2044 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2045 /* The compression and expansion should be avoided. Doesn't point 2046 out errors, might change the indices, hence buggey */ 2047 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 2048 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 2049 2050 /* Check for special case: each processor gets entire matrix columns */ 2051 ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr); 2052 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 2053 if (idflag && ncol == mat->cmap->N) { 2054 allcols = PETSC_TRUE; 2055 } else { 2056 allcols = PETSC_FALSE; 2057 } 2058 2059 ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr); 2060 ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr); 2061 if (idflag && nrow == mat->rmap->N) { 2062 allrows = PETSC_TRUE; 2063 } else { 2064 allrows = PETSC_FALSE; 2065 } 2066 if (call == MAT_REUSE_MATRIX) { 2067 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2068 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2069 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2070 } else { 2071 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2072 } 2073 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 2074 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 2075 /* 2076 m - number of local rows 2077 n - number of columns (same on all processors) 2078 rstart - first row in new global matrix generated 2079 */ 2080 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2081 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2082 m = m/bs; 2083 n = n/bs; 2084 2085 if (call == MAT_INITIAL_MATRIX) { 2086 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2087 ii = aij->i; 2088 jj = aij->j; 2089 2090 /* 2091 Determine the number of non-zeros in the diagonal and off-diagonal 2092 portions of the matrix in order to do correct preallocation 2093 */ 2094 2095 /* first get start and end of "diagonal" columns */ 2096 if (csize == PETSC_DECIDE) { 2097 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2098 if (mglobal == n*bs) { /* square matrix */ 2099 nlocal = m; 2100 } else { 2101 nlocal = n/size + ((n % size) > rank); 2102 } 2103 } else { 2104 nlocal = csize/bs; 2105 } 2106 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2107 rstart = rend - nlocal; 2108 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2109 2110 /* next, compute all the lengths */ 2111 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2112 olens = dlens + m; 2113 for (i=0; i<m; i++) { 2114 jend = ii[i+1] - ii[i]; 2115 olen = 0; 2116 dlen = 0; 2117 for (j=0; j<jend; j++) { 2118 if (*jj < rstart || *jj >= rend) olen++; 2119 else dlen++; 2120 jj++; 2121 } 2122 olens[i] = olen; 2123 dlens[i] = dlen; 2124 } 2125 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2126 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2127 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2128 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2129 ierr = PetscFree(dlens);CHKERRQ(ierr); 2130 } else { 2131 PetscInt ml,nl; 2132 2133 M = *newmat; 2134 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2135 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2136 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2137 /* 2138 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2139 rather than the slower MatSetValues(). 2140 */ 2141 M->was_assembled = PETSC_TRUE; 2142 M->assembled = PETSC_FALSE; 2143 } 2144 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2145 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2146 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2147 ii = aij->i; 2148 jj = aij->j; 2149 aa = aij->a; 2150 for (i=0; i<m; i++) { 2151 row = rstart/bs + i; 2152 nz = ii[i+1] - ii[i]; 2153 cwork = jj; jj += nz; 2154 vwork = aa; aa += nz*bs*bs; 2155 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2156 } 2157 2158 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2159 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2160 *newmat = M; 2161 2162 /* save submatrix used in processor for next request */ 2163 if (call == MAT_INITIAL_MATRIX) { 2164 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2165 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2166 } 2167 PetscFunctionReturn(0); 2168 } 2169 2170 #undef __FUNCT__ 2171 #define __FUNCT__ "MatPermute_MPIBAIJ" 2172 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2173 { 2174 MPI_Comm comm,pcomm; 2175 PetscInt first,rlocal_size,clocal_size,nrows; 2176 const PetscInt *rows; 2177 PetscMPIInt size; 2178 IS crowp,growp,irowp,lrowp,lcolp; 2179 PetscErrorCode ierr; 2180 2181 PetscFunctionBegin; 2182 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2183 /* make a collective version of 'rowp' */ 2184 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2185 if (pcomm==comm) { 2186 crowp = rowp; 2187 } else { 2188 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2189 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2190 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2191 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2192 } 2193 /* collect the global row permutation and invert it */ 2194 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2195 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2196 if (pcomm!=comm) { 2197 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2198 } 2199 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2200 ierr = ISDestroy(&growp);CHKERRQ(ierr); 2201 /* get the local target indices */ 2202 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2203 ierr = MatGetLocalSize(A,&rlocal_size,&clocal_size);CHKERRQ(ierr); 2204 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2205 ierr = ISCreateGeneral(MPI_COMM_SELF,rlocal_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 2206 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2207 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 2208 /* the column permutation is so much easier; 2209 make a local version of 'colp' and invert it */ 2210 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2211 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2212 if (size==1) { 2213 lcolp = colp; 2214 } else { 2215 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 2216 } 2217 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2218 /* now we just get the submatrix */ 2219 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2220 if (size>1) { 2221 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2222 } 2223 /* clean up */ 2224 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 2225 PetscFunctionReturn(0); 2226 } 2227 2228 #undef __FUNCT__ 2229 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2230 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2231 { 2232 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2233 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2234 2235 PetscFunctionBegin; 2236 if (nghosts) { *nghosts = B->nbs;} 2237 if (ghosts) {*ghosts = baij->garray;} 2238 PetscFunctionReturn(0); 2239 } 2240 2241 extern PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat); 2242 2243 #undef __FUNCT__ 2244 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2245 /* 2246 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2247 */ 2248 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2249 { 2250 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2251 PetscErrorCode ierr; 2252 PetscMPIInt size,*ncolsonproc,*disp,nn; 2253 PetscInt bs,i,n,nrows,j,k,m,ncols,col; 2254 const PetscInt *is,*rows = 0,*A_ci,*A_cj,*B_ci,*B_cj; 2255 PetscInt nis = iscoloring->n,nctot,*cols; 2256 PetscInt *rowhit,M,cstart,cend,colb; 2257 PetscInt *columnsforrow,l; 2258 IS *isa; 2259 PetscBool done,flg; 2260 ISLocalToGlobalMapping map = mat->cmap->bmapping; 2261 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2262 2263 PetscFunctionBegin; 2264 if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2265 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2266 2267 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2268 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2269 M = mat->rmap->n/bs; 2270 cstart = mat->cmap->rstart/bs; 2271 cend = mat->cmap->rend/bs; 2272 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2273 c->N = mat->cmap->N/bs; 2274 c->m = mat->rmap->n/bs; 2275 c->rstart = mat->rmap->rstart/bs; 2276 2277 c->ncolors = nis; 2278 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2279 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2280 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2281 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2282 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2283 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2284 2285 /* Allow access to data structures of local part of matrix */ 2286 if (!baij->colmap) { 2287 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2288 } 2289 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2290 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2291 2292 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2293 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2294 2295 for (i=0; i<nis; i++) { 2296 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2297 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2298 c->ncolumns[i] = n; 2299 if (n) { 2300 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2301 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2302 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2303 } else { 2304 c->columns[i] = 0; 2305 } 2306 2307 if (ctype == IS_COLORING_GLOBAL) { 2308 /* Determine the total (parallel) number of columns of this color */ 2309 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2310 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2311 2312 ierr = PetscMPIIntCast(n,&nn);CHKERRQ(ierr); 2313 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2314 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2315 if (!nctot) { 2316 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2317 } 2318 2319 disp[0] = 0; 2320 for (j=1; j<size; j++) { 2321 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2322 } 2323 2324 /* Get complete list of columns for color on each processor */ 2325 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2326 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2327 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2328 } else if (ctype == IS_COLORING_GHOSTED) { 2329 /* Determine local number of columns of this color on this process, including ghost points */ 2330 nctot = n; 2331 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2332 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2333 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2334 2335 /* 2336 Mark all rows affect by these columns 2337 */ 2338 /* Temporary option to allow for debugging/testing */ 2339 flg = PETSC_FALSE; 2340 ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2341 if (!flg) {/*-----------------------------------------------------------------------------*/ 2342 /* crude, fast version */ 2343 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2344 /* loop over columns*/ 2345 for (j=0; j<nctot; j++) { 2346 if (ctype == IS_COLORING_GHOSTED) { 2347 col = ltog[cols[j]]; 2348 } else { 2349 col = cols[j]; 2350 } 2351 if (col >= cstart && col < cend) { 2352 /* column is in diagonal block of matrix */ 2353 rows = A_cj + A_ci[col-cstart]; 2354 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2355 } else { 2356 #if defined (PETSC_USE_CTABLE) 2357 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2358 colb --; 2359 #else 2360 colb = baij->colmap[col] - 1; 2361 #endif 2362 if (colb == -1) { 2363 m = 0; 2364 } else { 2365 colb = colb/bs; 2366 rows = B_cj + B_ci[colb]; 2367 m = B_ci[colb+1] - B_ci[colb]; 2368 } 2369 } 2370 /* loop over columns marking them in rowhit */ 2371 for (k=0; k<m; k++) { 2372 rowhit[*rows++] = col + 1; 2373 } 2374 } 2375 2376 /* count the number of hits */ 2377 nrows = 0; 2378 for (j=0; j<M; j++) { 2379 if (rowhit[j]) nrows++; 2380 } 2381 c->nrows[i] = nrows; 2382 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2383 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2384 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2385 nrows = 0; 2386 for (j=0; j<M; j++) { 2387 if (rowhit[j]) { 2388 c->rows[i][nrows] = j; 2389 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2390 nrows++; 2391 } 2392 } 2393 } else {/*-------------------------------------------------------------------------------*/ 2394 /* slow version, using rowhit as a linked list */ 2395 PetscInt currentcol,fm,mfm; 2396 rowhit[M] = M; 2397 nrows = 0; 2398 /* loop over columns*/ 2399 for (j=0; j<nctot; j++) { 2400 if (ctype == IS_COLORING_GHOSTED) { 2401 col = ltog[cols[j]]; 2402 } else { 2403 col = cols[j]; 2404 } 2405 if (col >= cstart && col < cend) { 2406 /* column is in diagonal block of matrix */ 2407 rows = A_cj + A_ci[col-cstart]; 2408 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2409 } else { 2410 #if defined (PETSC_USE_CTABLE) 2411 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2412 colb --; 2413 #else 2414 colb = baij->colmap[col] - 1; 2415 #endif 2416 if (colb == -1) { 2417 m = 0; 2418 } else { 2419 colb = colb/bs; 2420 rows = B_cj + B_ci[colb]; 2421 m = B_ci[colb+1] - B_ci[colb]; 2422 } 2423 } 2424 2425 /* loop over columns marking them in rowhit */ 2426 fm = M; /* fm points to first entry in linked list */ 2427 for (k=0; k<m; k++) { 2428 currentcol = *rows++; 2429 /* is it already in the list? */ 2430 do { 2431 mfm = fm; 2432 fm = rowhit[fm]; 2433 } while (fm < currentcol); 2434 /* not in list so add it */ 2435 if (fm != currentcol) { 2436 nrows++; 2437 columnsforrow[currentcol] = col; 2438 /* next three lines insert new entry into linked list */ 2439 rowhit[mfm] = currentcol; 2440 rowhit[currentcol] = fm; 2441 fm = currentcol; 2442 /* fm points to present position in list since we know the columns are sorted */ 2443 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2444 } 2445 } 2446 c->nrows[i] = nrows; 2447 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2448 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2449 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2450 /* now store the linked list of rows into c->rows[i] */ 2451 nrows = 0; 2452 fm = rowhit[M]; 2453 do { 2454 c->rows[i][nrows] = fm; 2455 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2456 fm = rowhit[fm]; 2457 } while (fm < M); 2458 } /* ---------------------------------------------------------------------------------------*/ 2459 ierr = PetscFree(cols);CHKERRQ(ierr); 2460 } 2461 2462 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2463 /* 2464 vscale will contain the "diagonal" on processor scalings followed by the off processor 2465 */ 2466 if (ctype == IS_COLORING_GLOBAL) { 2467 PetscInt *garray; 2468 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2469 for (i=0; i<baij->B->cmap->n/bs; i++) { 2470 for (j=0; j<bs; j++) { 2471 garray[i*bs+j] = bs*baij->garray[i]+j; 2472 } 2473 } 2474 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2475 ierr = PetscFree(garray);CHKERRQ(ierr); 2476 CHKMEMQ; 2477 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2478 for (k=0; k<c->ncolors; k++) { 2479 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2480 for (l=0; l<c->nrows[k]; l++) { 2481 col = c->columnsforrow[k][l]; 2482 if (col >= cstart && col < cend) { 2483 /* column is in diagonal block of matrix */ 2484 colb = col - cstart; 2485 } else { 2486 /* column is in "off-processor" part */ 2487 #if defined (PETSC_USE_CTABLE) 2488 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2489 colb --; 2490 #else 2491 colb = baij->colmap[col] - 1; 2492 #endif 2493 colb = colb/bs; 2494 colb += cend - cstart; 2495 } 2496 c->vscaleforrow[k][l] = colb; 2497 } 2498 } 2499 } else if (ctype == IS_COLORING_GHOSTED) { 2500 /* Get gtol mapping */ 2501 PetscInt N = mat->cmap->N, *gtol; 2502 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2503 for (i=0; i<N; i++) gtol[i] = -1; 2504 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2505 2506 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2507 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2508 for (k=0; k<c->ncolors; k++) { 2509 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2510 for (l=0; l<c->nrows[k]; l++) { 2511 col = c->columnsforrow[k][l]; /* global column index */ 2512 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2513 } 2514 } 2515 ierr = PetscFree(gtol);CHKERRQ(ierr); 2516 } 2517 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2518 2519 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2520 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2521 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2522 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2523 CHKMEMQ; 2524 PetscFunctionReturn(0); 2525 } 2526 2527 #undef __FUNCT__ 2528 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ" 2529 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2530 { 2531 Mat B; 2532 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2533 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2534 Mat_SeqAIJ *b; 2535 PetscErrorCode ierr; 2536 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2537 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2538 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2539 2540 PetscFunctionBegin; 2541 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2542 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2543 2544 /* ---------------------------------------------------------------- 2545 Tell every processor the number of nonzeros per row 2546 */ 2547 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2548 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2549 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2550 } 2551 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2552 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2553 displs = recvcounts + size; 2554 for (i=0; i<size; i++) { 2555 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2556 displs[i] = A->rmap->range[i]/bs; 2557 } 2558 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2559 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2560 #else 2561 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2562 #endif 2563 /* --------------------------------------------------------------- 2564 Create the sequential matrix of the same type as the local block diagonal 2565 */ 2566 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2567 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2568 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2569 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2570 b = (Mat_SeqAIJ *)B->data; 2571 2572 /*-------------------------------------------------------------------- 2573 Copy my part of matrix column indices over 2574 */ 2575 sendcount = ad->nz + bd->nz; 2576 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2577 a_jsendbuf = ad->j; 2578 b_jsendbuf = bd->j; 2579 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2580 cnt = 0; 2581 for (i=0; i<n; i++) { 2582 2583 /* put in lower diagonal portion */ 2584 m = bd->i[i+1] - bd->i[i]; 2585 while (m > 0) { 2586 /* is it above diagonal (in bd (compressed) numbering) */ 2587 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2588 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2589 m--; 2590 } 2591 2592 /* put in diagonal portion */ 2593 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2594 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2595 } 2596 2597 /* put in upper diagonal portion */ 2598 while (m-- > 0) { 2599 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2600 } 2601 } 2602 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2603 2604 /*-------------------------------------------------------------------- 2605 Gather all column indices to all processors 2606 */ 2607 for (i=0; i<size; i++) { 2608 recvcounts[i] = 0; 2609 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2610 recvcounts[i] += lens[j]; 2611 } 2612 } 2613 displs[0] = 0; 2614 for (i=1; i<size; i++) { 2615 displs[i] = displs[i-1] + recvcounts[i-1]; 2616 } 2617 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2618 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2619 #else 2620 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2621 #endif 2622 /*-------------------------------------------------------------------- 2623 Assemble the matrix into useable form (note numerical values not yet set) 2624 */ 2625 /* set the b->ilen (length of each row) values */ 2626 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2627 /* set the b->i indices */ 2628 b->i[0] = 0; 2629 for (i=1; i<=A->rmap->N/bs; i++) { 2630 b->i[i] = b->i[i-1] + lens[i-1]; 2631 } 2632 ierr = PetscFree(lens);CHKERRQ(ierr); 2633 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2634 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2635 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2636 2637 if (A->symmetric) { 2638 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2639 } else if (A->hermitian) { 2640 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2641 } else if (A->structurally_symmetric) { 2642 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2643 } 2644 *newmat = B; 2645 PetscFunctionReturn(0); 2646 } 2647 2648 #undef __FUNCT__ 2649 #define __FUNCT__ "MatSOR_MPIBAIJ" 2650 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2651 { 2652 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2653 PetscErrorCode ierr; 2654 Vec bb1 = 0; 2655 2656 PetscFunctionBegin; 2657 if (flag == SOR_APPLY_UPPER) { 2658 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2659 PetscFunctionReturn(0); 2660 } 2661 2662 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2663 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2664 } 2665 2666 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2667 if (flag & SOR_ZERO_INITIAL_GUESS) { 2668 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2669 its--; 2670 } 2671 2672 while (its--) { 2673 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2674 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2675 2676 /* update rhs: bb1 = bb - B*x */ 2677 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2678 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2679 2680 /* local sweep */ 2681 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2682 } 2683 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2684 if (flag & SOR_ZERO_INITIAL_GUESS) { 2685 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2686 its--; 2687 } 2688 while (its--) { 2689 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2690 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2691 2692 /* update rhs: bb1 = bb - B*x */ 2693 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2694 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2695 2696 /* local sweep */ 2697 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2698 } 2699 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2700 if (flag & SOR_ZERO_INITIAL_GUESS) { 2701 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2702 its--; 2703 } 2704 while (its--) { 2705 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2706 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2707 2708 /* update rhs: bb1 = bb - B*x */ 2709 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2710 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2711 2712 /* local sweep */ 2713 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2714 } 2715 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2716 2717 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2718 PetscFunctionReturn(0); 2719 } 2720 2721 extern PetscErrorCode MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2722 2723 #undef __FUNCT__ 2724 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ" 2725 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2726 { 2727 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2728 PetscErrorCode ierr; 2729 2730 PetscFunctionBegin; 2731 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2732 PetscFunctionReturn(0); 2733 } 2734 2735 2736 /* -------------------------------------------------------------------*/ 2737 static struct _MatOps MatOps_Values = { 2738 MatSetValues_MPIBAIJ, 2739 MatGetRow_MPIBAIJ, 2740 MatRestoreRow_MPIBAIJ, 2741 MatMult_MPIBAIJ, 2742 /* 4*/ MatMultAdd_MPIBAIJ, 2743 MatMultTranspose_MPIBAIJ, 2744 MatMultTransposeAdd_MPIBAIJ, 2745 0, 2746 0, 2747 0, 2748 /*10*/ 0, 2749 0, 2750 0, 2751 MatSOR_MPIBAIJ, 2752 MatTranspose_MPIBAIJ, 2753 /*15*/ MatGetInfo_MPIBAIJ, 2754 MatEqual_MPIBAIJ, 2755 MatGetDiagonal_MPIBAIJ, 2756 MatDiagonalScale_MPIBAIJ, 2757 MatNorm_MPIBAIJ, 2758 /*20*/ MatAssemblyBegin_MPIBAIJ, 2759 MatAssemblyEnd_MPIBAIJ, 2760 MatSetOption_MPIBAIJ, 2761 MatZeroEntries_MPIBAIJ, 2762 /*24*/ MatZeroRows_MPIBAIJ, 2763 0, 2764 0, 2765 0, 2766 0, 2767 /*29*/ MatSetUp_MPIBAIJ, 2768 0, 2769 0, 2770 0, 2771 0, 2772 /*34*/ MatDuplicate_MPIBAIJ, 2773 0, 2774 0, 2775 0, 2776 0, 2777 /*39*/ MatAXPY_MPIBAIJ, 2778 MatGetSubMatrices_MPIBAIJ, 2779 MatIncreaseOverlap_MPIBAIJ, 2780 MatGetValues_MPIBAIJ, 2781 MatCopy_MPIBAIJ, 2782 /*44*/ 0, 2783 MatScale_MPIBAIJ, 2784 0, 2785 0, 2786 0, 2787 /*49*/ 0, 2788 0, 2789 0, 2790 0, 2791 0, 2792 /*54*/ MatFDColoringCreate_MPIBAIJ, 2793 0, 2794 MatSetUnfactored_MPIBAIJ, 2795 MatPermute_MPIBAIJ, 2796 MatSetValuesBlocked_MPIBAIJ, 2797 /*59*/ MatGetSubMatrix_MPIBAIJ, 2798 MatDestroy_MPIBAIJ, 2799 MatView_MPIBAIJ, 2800 0, 2801 0, 2802 /*64*/ 0, 2803 0, 2804 0, 2805 0, 2806 0, 2807 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2808 0, 2809 0, 2810 0, 2811 0, 2812 /*74*/ 0, 2813 MatFDColoringApply_BAIJ, 2814 0, 2815 0, 2816 0, 2817 /*79*/ 0, 2818 0, 2819 0, 2820 0, 2821 MatLoad_MPIBAIJ, 2822 /*84*/ 0, 2823 0, 2824 0, 2825 0, 2826 0, 2827 /*89*/ 0, 2828 0, 2829 0, 2830 0, 2831 0, 2832 /*94*/ 0, 2833 0, 2834 0, 2835 0, 2836 0, 2837 /*99*/ 0, 2838 0, 2839 0, 2840 0, 2841 0, 2842 /*104*/0, 2843 MatRealPart_MPIBAIJ, 2844 MatImaginaryPart_MPIBAIJ, 2845 0, 2846 0, 2847 /*109*/0, 2848 0, 2849 0, 2850 0, 2851 0, 2852 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2853 0, 2854 MatGetGhosts_MPIBAIJ, 2855 0, 2856 0, 2857 /*119*/0, 2858 0, 2859 0, 2860 0, 2861 0, 2862 /*124*/0, 2863 0, 2864 MatInvertBlockDiagonal_MPIBAIJ 2865 }; 2866 2867 EXTERN_C_BEGIN 2868 #undef __FUNCT__ 2869 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2870 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2871 { 2872 PetscFunctionBegin; 2873 *a = ((Mat_MPIBAIJ *)A->data)->A; 2874 PetscFunctionReturn(0); 2875 } 2876 EXTERN_C_END 2877 2878 EXTERN_C_BEGIN 2879 extern PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2880 EXTERN_C_END 2881 2882 EXTERN_C_BEGIN 2883 #undef __FUNCT__ 2884 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2885 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2886 { 2887 PetscInt m,rstart,cstart,cend; 2888 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2889 const PetscInt *JJ=0; 2890 PetscScalar *values=0; 2891 PetscErrorCode ierr; 2892 2893 PetscFunctionBegin; 2894 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2895 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2896 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2897 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2898 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2899 m = B->rmap->n/bs; 2900 rstart = B->rmap->rstart/bs; 2901 cstart = B->cmap->rstart/bs; 2902 cend = B->cmap->rend/bs; 2903 2904 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2905 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2906 for (i=0; i<m; i++) { 2907 nz = ii[i+1] - ii[i]; 2908 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2909 nz_max = PetscMax(nz_max,nz); 2910 JJ = jj + ii[i]; 2911 for (j=0; j<nz; j++) { 2912 if (*JJ >= cstart) break; 2913 JJ++; 2914 } 2915 d = 0; 2916 for (; j<nz; j++) { 2917 if (*JJ++ >= cend) break; 2918 d++; 2919 } 2920 d_nnz[i] = d; 2921 o_nnz[i] = nz - d; 2922 } 2923 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2924 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2925 2926 values = (PetscScalar*)V; 2927 if (!values) { 2928 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2929 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2930 } 2931 for (i=0; i<m; i++) { 2932 PetscInt row = i + rstart; 2933 PetscInt ncols = ii[i+1] - ii[i]; 2934 const PetscInt *icols = jj + ii[i]; 2935 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2936 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2937 } 2938 2939 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2940 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2941 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2942 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2943 PetscFunctionReturn(0); 2944 } 2945 EXTERN_C_END 2946 2947 #undef __FUNCT__ 2948 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2949 /*@C 2950 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2951 (the default parallel PETSc format). 2952 2953 Collective on MPI_Comm 2954 2955 Input Parameters: 2956 + A - the matrix 2957 . bs - the block size 2958 . i - the indices into j for the start of each local row (starts with zero) 2959 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2960 - v - optional values in the matrix 2961 2962 Level: developer 2963 2964 .keywords: matrix, aij, compressed row, sparse, parallel 2965 2966 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 2967 @*/ 2968 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2969 { 2970 PetscErrorCode ierr; 2971 2972 PetscFunctionBegin; 2973 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2974 PetscValidType(B,1); 2975 PetscValidLogicalCollectiveInt(B,bs,2); 2976 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2977 PetscFunctionReturn(0); 2978 } 2979 2980 EXTERN_C_BEGIN 2981 #undef __FUNCT__ 2982 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2983 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2984 { 2985 Mat_MPIBAIJ *b; 2986 PetscErrorCode ierr; 2987 PetscInt i; 2988 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 2989 2990 PetscFunctionBegin; 2991 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 2992 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 2993 2994 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2995 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2996 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2997 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2998 2999 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3000 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3001 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3002 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3003 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3004 3005 if (d_nnz) { 3006 for (i=0; i<B->rmap->n/bs; i++) { 3007 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 3008 } 3009 } 3010 if (o_nnz) { 3011 for (i=0; i<B->rmap->n/bs; i++) { 3012 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 3013 } 3014 } 3015 3016 b = (Mat_MPIBAIJ*)B->data; 3017 b->bs2 = bs*bs; 3018 b->mbs = B->rmap->n/bs; 3019 b->nbs = B->cmap->n/bs; 3020 b->Mbs = B->rmap->N/bs; 3021 b->Nbs = B->cmap->N/bs; 3022 3023 for (i=0; i<=b->size; i++) { 3024 b->rangebs[i] = B->rmap->range[i]/bs; 3025 } 3026 b->rstartbs = B->rmap->rstart/bs; 3027 b->rendbs = B->rmap->rend/bs; 3028 b->cstartbs = B->cmap->rstart/bs; 3029 b->cendbs = B->cmap->rend/bs; 3030 3031 if (!B->preallocated) { 3032 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3033 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3034 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 3035 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3036 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3037 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3038 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 3039 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3040 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 3041 } 3042 3043 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3044 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 3045 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3046 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3047 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3048 B->preallocated = PETSC_TRUE; 3049 PetscFunctionReturn(0); 3050 } 3051 EXTERN_C_END 3052 3053 EXTERN_C_BEGIN 3054 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 3055 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 3056 EXTERN_C_END 3057 3058 3059 EXTERN_C_BEGIN 3060 #undef __FUNCT__ 3061 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 3062 PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 3063 { 3064 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 3065 PetscErrorCode ierr; 3066 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 3067 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 3068 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 3069 3070 PetscFunctionBegin; 3071 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 3072 ii[0] = 0; 3073 CHKMEMQ; 3074 for (i=0; i<M; i++) { 3075 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 3076 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 3077 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 3078 /* remove one from count of matrix has diagonal */ 3079 for (j=id[i]; j<id[i+1]; j++) { 3080 if (jd[j] == i) {ii[i+1]--;break;} 3081 } 3082 CHKMEMQ; 3083 } 3084 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3085 cnt = 0; 3086 for (i=0; i<M; i++) { 3087 for (j=io[i]; j<io[i+1]; j++) { 3088 if (garray[jo[j]] > rstart) break; 3089 jj[cnt++] = garray[jo[j]]; 3090 CHKMEMQ; 3091 } 3092 for (k=id[i]; k<id[i+1]; k++) { 3093 if (jd[k] != i) { 3094 jj[cnt++] = rstart + jd[k]; 3095 CHKMEMQ; 3096 } 3097 } 3098 for (;j<io[i+1]; j++) { 3099 jj[cnt++] = garray[jo[j]]; 3100 CHKMEMQ; 3101 } 3102 } 3103 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 3104 PetscFunctionReturn(0); 3105 } 3106 EXTERN_C_END 3107 3108 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3109 EXTERN_C_BEGIN 3110 PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 3111 EXTERN_C_END 3112 3113 EXTERN_C_BEGIN 3114 #undef __FUNCT__ 3115 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 3116 PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 3117 { 3118 PetscErrorCode ierr; 3119 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3120 Mat B; 3121 Mat_MPIAIJ *b; 3122 3123 PetscFunctionBegin; 3124 if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled"); 3125 3126 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 3127 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3128 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 3129 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3130 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 3131 b = (Mat_MPIAIJ*) B->data; 3132 3133 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 3134 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 3135 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 3136 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 3137 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 3138 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3139 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3140 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3141 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3142 if (reuse == MAT_REUSE_MATRIX) { 3143 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3144 } else { 3145 *newmat = B; 3146 } 3147 PetscFunctionReturn(0); 3148 } 3149 EXTERN_C_END 3150 3151 EXTERN_C_BEGIN 3152 #if defined(PETSC_HAVE_MUMPS) 3153 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3154 #endif 3155 EXTERN_C_END 3156 3157 /*MC 3158 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 3159 3160 Options Database Keys: 3161 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 3162 . -mat_block_size <bs> - set the blocksize used to store the matrix 3163 - -mat_use_hash_table <fact> 3164 3165 Level: beginner 3166 3167 .seealso: MatCreateMPIBAIJ 3168 M*/ 3169 3170 EXTERN_C_BEGIN 3171 extern PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 3172 EXTERN_C_END 3173 3174 EXTERN_C_BEGIN 3175 #undef __FUNCT__ 3176 #define __FUNCT__ "MatCreate_MPIBAIJ" 3177 PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3178 { 3179 Mat_MPIBAIJ *b; 3180 PetscErrorCode ierr; 3181 PetscBool flg; 3182 3183 PetscFunctionBegin; 3184 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 3185 B->data = (void*)b; 3186 3187 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3188 B->assembled = PETSC_FALSE; 3189 3190 B->insertmode = NOT_SET_VALUES; 3191 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 3192 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 3193 3194 /* build local table of row and column ownerships */ 3195 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 3196 3197 /* build cache for off array entries formed */ 3198 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 3199 b->donotstash = PETSC_FALSE; 3200 b->colmap = PETSC_NULL; 3201 b->garray = PETSC_NULL; 3202 b->roworiented = PETSC_TRUE; 3203 3204 /* stuff used in block assembly */ 3205 b->barray = 0; 3206 3207 /* stuff used for matrix vector multiply */ 3208 b->lvec = 0; 3209 b->Mvctx = 0; 3210 3211 /* stuff for MatGetRow() */ 3212 b->rowindices = 0; 3213 b->rowvalues = 0; 3214 b->getrowactive = PETSC_FALSE; 3215 3216 /* hash table stuff */ 3217 b->ht = 0; 3218 b->hd = 0; 3219 b->ht_size = 0; 3220 b->ht_flag = PETSC_FALSE; 3221 b->ht_fact = 0; 3222 b->ht_total_ct = 0; 3223 b->ht_insert_ct = 0; 3224 3225 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 3226 b->ijonly = PETSC_FALSE; 3227 3228 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3229 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3230 if (flg) { 3231 PetscReal fact = 1.39; 3232 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3233 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 3234 if (fact <= 1.0) fact = 1.39; 3235 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3236 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3237 } 3238 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3239 3240 #if defined(PETSC_HAVE_MUMPS) 3241 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3242 #endif 3243 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 3244 "MatConvert_MPIBAIJ_MPIAdj", 3245 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3246 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C", 3247 "MatConvert_MPIBAIJ_MPIAIJ", 3248 MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3249 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C", 3250 "MatConvert_MPIBAIJ_MPISBAIJ", 3251 MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3252 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3253 "MatStoreValues_MPIBAIJ", 3254 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3255 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3256 "MatRetrieveValues_MPIBAIJ", 3257 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3258 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3259 "MatGetDiagonalBlock_MPIBAIJ", 3260 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3261 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3262 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3263 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3264 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3265 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3266 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3267 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3268 "MatDiagonalScaleLocal_MPIBAIJ", 3269 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3270 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3271 "MatSetHashTableFactor_MPIBAIJ", 3272 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3273 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C", 3274 "MatConvert_MPIBAIJ_MPIBSTRM", 3275 MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr); 3276 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3277 PetscFunctionReturn(0); 3278 } 3279 EXTERN_C_END 3280 3281 /*MC 3282 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3283 3284 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3285 and MATMPIBAIJ otherwise. 3286 3287 Options Database Keys: 3288 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3289 3290 Level: beginner 3291 3292 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3293 M*/ 3294 3295 #undef __FUNCT__ 3296 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3297 /*@C 3298 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3299 (block compressed row). For good matrix assembly performance 3300 the user should preallocate the matrix storage by setting the parameters 3301 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3302 performance can be increased by more than a factor of 50. 3303 3304 Collective on Mat 3305 3306 Input Parameters: 3307 + A - the matrix 3308 . bs - size of blockk 3309 . d_nz - number of block nonzeros per block row in diagonal portion of local 3310 submatrix (same for all local rows) 3311 . d_nnz - array containing the number of block nonzeros in the various block rows 3312 of the in diagonal portion of the local (possibly different for each block 3313 row) or PETSC_NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 3314 set it even if it is zero. 3315 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3316 submatrix (same for all local rows). 3317 - o_nnz - array containing the number of nonzeros in the various block rows of the 3318 off-diagonal portion of the local submatrix (possibly different for 3319 each block row) or PETSC_NULL. 3320 3321 If the *_nnz parameter is given then the *_nz parameter is ignored 3322 3323 Options Database Keys: 3324 + -mat_block_size - size of the blocks to use 3325 - -mat_use_hash_table <fact> 3326 3327 Notes: 3328 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3329 than it must be used on all processors that share the object for that argument. 3330 3331 Storage Information: 3332 For a square global matrix we define each processor's diagonal portion 3333 to be its local rows and the corresponding columns (a square submatrix); 3334 each processor's off-diagonal portion encompasses the remainder of the 3335 local matrix (a rectangular submatrix). 3336 3337 The user can specify preallocated storage for the diagonal part of 3338 the local submatrix with either d_nz or d_nnz (not both). Set 3339 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3340 memory allocation. Likewise, specify preallocated storage for the 3341 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3342 3343 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3344 the figure below we depict these three local rows and all columns (0-11). 3345 3346 .vb 3347 0 1 2 3 4 5 6 7 8 9 10 11 3348 ------------------- 3349 row 3 | o o o d d d o o o o o o 3350 row 4 | o o o d d d o o o o o o 3351 row 5 | o o o d d d o o o o o o 3352 ------------------- 3353 .ve 3354 3355 Thus, any entries in the d locations are stored in the d (diagonal) 3356 submatrix, and any entries in the o locations are stored in the 3357 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3358 stored simply in the MATSEQBAIJ format for compressed row storage. 3359 3360 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3361 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3362 In general, for PDE problems in which most nonzeros are near the diagonal, 3363 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3364 or you will get TERRIBLE performance; see the users' manual chapter on 3365 matrices. 3366 3367 You can call MatGetInfo() to get information on how effective the preallocation was; 3368 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3369 You can also run with the option -info and look for messages with the string 3370 malloc in them to see if additional memory allocation was needed. 3371 3372 Level: intermediate 3373 3374 .keywords: matrix, block, aij, compressed row, sparse, parallel 3375 3376 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3377 @*/ 3378 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3379 { 3380 PetscErrorCode ierr; 3381 3382 PetscFunctionBegin; 3383 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3384 PetscValidType(B,1); 3385 PetscValidLogicalCollectiveInt(B,bs,2); 3386 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3387 PetscFunctionReturn(0); 3388 } 3389 3390 #undef __FUNCT__ 3391 #define __FUNCT__ "MatCreateBAIJ" 3392 /*@C 3393 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3394 (block compressed row). For good matrix assembly performance 3395 the user should preallocate the matrix storage by setting the parameters 3396 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3397 performance can be increased by more than a factor of 50. 3398 3399 Collective on MPI_Comm 3400 3401 Input Parameters: 3402 + comm - MPI communicator 3403 . bs - size of blockk 3404 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3405 This value should be the same as the local size used in creating the 3406 y vector for the matrix-vector product y = Ax. 3407 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3408 This value should be the same as the local size used in creating the 3409 x vector for the matrix-vector product y = Ax. 3410 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3411 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3412 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3413 submatrix (same for all local rows) 3414 . d_nnz - array containing the number of nonzero blocks in the various block rows 3415 of the in diagonal portion of the local (possibly different for each block 3416 row) or PETSC_NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3417 and set it even if it is zero. 3418 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3419 submatrix (same for all local rows). 3420 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3421 off-diagonal portion of the local submatrix (possibly different for 3422 each block row) or PETSC_NULL. 3423 3424 Output Parameter: 3425 . A - the matrix 3426 3427 Options Database Keys: 3428 + -mat_block_size - size of the blocks to use 3429 - -mat_use_hash_table <fact> 3430 3431 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3432 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3433 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3434 3435 Notes: 3436 If the *_nnz parameter is given then the *_nz parameter is ignored 3437 3438 A nonzero block is any block that as 1 or more nonzeros in it 3439 3440 The user MUST specify either the local or global matrix dimensions 3441 (possibly both). 3442 3443 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3444 than it must be used on all processors that share the object for that argument. 3445 3446 Storage Information: 3447 For a square global matrix we define each processor's diagonal portion 3448 to be its local rows and the corresponding columns (a square submatrix); 3449 each processor's off-diagonal portion encompasses the remainder of the 3450 local matrix (a rectangular submatrix). 3451 3452 The user can specify preallocated storage for the diagonal part of 3453 the local submatrix with either d_nz or d_nnz (not both). Set 3454 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3455 memory allocation. Likewise, specify preallocated storage for the 3456 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3457 3458 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3459 the figure below we depict these three local rows and all columns (0-11). 3460 3461 .vb 3462 0 1 2 3 4 5 6 7 8 9 10 11 3463 ------------------- 3464 row 3 | o o o d d d o o o o o o 3465 row 4 | o o o d d d o o o o o o 3466 row 5 | o o o d d d o o o o o o 3467 ------------------- 3468 .ve 3469 3470 Thus, any entries in the d locations are stored in the d (diagonal) 3471 submatrix, and any entries in the o locations are stored in the 3472 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3473 stored simply in the MATSEQBAIJ format for compressed row storage. 3474 3475 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3476 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3477 In general, for PDE problems in which most nonzeros are near the diagonal, 3478 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3479 or you will get TERRIBLE performance; see the users' manual chapter on 3480 matrices. 3481 3482 Level: intermediate 3483 3484 .keywords: matrix, block, aij, compressed row, sparse, parallel 3485 3486 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3487 @*/ 3488 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3489 { 3490 PetscErrorCode ierr; 3491 PetscMPIInt size; 3492 3493 PetscFunctionBegin; 3494 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3495 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3496 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3497 if (size > 1) { 3498 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3499 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3500 } else { 3501 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3502 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3503 } 3504 PetscFunctionReturn(0); 3505 } 3506 3507 #undef __FUNCT__ 3508 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3509 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3510 { 3511 Mat mat; 3512 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3513 PetscErrorCode ierr; 3514 PetscInt len=0; 3515 3516 PetscFunctionBegin; 3517 *newmat = 0; 3518 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3519 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3520 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3521 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3522 3523 mat->factortype = matin->factortype; 3524 mat->preallocated = PETSC_TRUE; 3525 mat->assembled = PETSC_TRUE; 3526 mat->insertmode = NOT_SET_VALUES; 3527 3528 a = (Mat_MPIBAIJ*)mat->data; 3529 mat->rmap->bs = matin->rmap->bs; 3530 a->bs2 = oldmat->bs2; 3531 a->mbs = oldmat->mbs; 3532 a->nbs = oldmat->nbs; 3533 a->Mbs = oldmat->Mbs; 3534 a->Nbs = oldmat->Nbs; 3535 3536 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3537 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3538 3539 a->size = oldmat->size; 3540 a->rank = oldmat->rank; 3541 a->donotstash = oldmat->donotstash; 3542 a->roworiented = oldmat->roworiented; 3543 a->rowindices = 0; 3544 a->rowvalues = 0; 3545 a->getrowactive = PETSC_FALSE; 3546 a->barray = 0; 3547 a->rstartbs = oldmat->rstartbs; 3548 a->rendbs = oldmat->rendbs; 3549 a->cstartbs = oldmat->cstartbs; 3550 a->cendbs = oldmat->cendbs; 3551 3552 /* hash table stuff */ 3553 a->ht = 0; 3554 a->hd = 0; 3555 a->ht_size = 0; 3556 a->ht_flag = oldmat->ht_flag; 3557 a->ht_fact = oldmat->ht_fact; 3558 a->ht_total_ct = 0; 3559 a->ht_insert_ct = 0; 3560 3561 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3562 if (oldmat->colmap) { 3563 #if defined (PETSC_USE_CTABLE) 3564 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3565 #else 3566 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3567 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3568 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3569 #endif 3570 } else a->colmap = 0; 3571 3572 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3573 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3574 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3575 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3576 } else a->garray = 0; 3577 3578 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3579 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3580 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3581 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3582 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3583 3584 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3585 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3586 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3587 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3588 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3589 *newmat = mat; 3590 PetscFunctionReturn(0); 3591 } 3592 3593 #undef __FUNCT__ 3594 #define __FUNCT__ "MatLoad_MPIBAIJ" 3595 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3596 { 3597 PetscErrorCode ierr; 3598 int fd; 3599 PetscInt i,nz,j,rstart,rend; 3600 PetscScalar *vals,*buf; 3601 MPI_Comm comm = ((PetscObject)viewer)->comm; 3602 MPI_Status status; 3603 PetscMPIInt rank,size,maxnz; 3604 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3605 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3606 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3607 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3608 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3609 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3610 3611 PetscFunctionBegin; 3612 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3613 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3614 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3615 3616 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3617 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3618 if (!rank) { 3619 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3620 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3621 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3622 } 3623 3624 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3625 3626 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3627 M = header[1]; N = header[2]; 3628 3629 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3630 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3631 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3632 3633 /* If global sizes are set, check if they are consistent with that given in the file */ 3634 if (sizesset) { 3635 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3636 } 3637 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3638 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3639 3640 if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices"); 3641 3642 /* 3643 This code adds extra rows to make sure the number of rows is 3644 divisible by the blocksize 3645 */ 3646 Mbs = M/bs; 3647 extra_rows = bs - M + bs*Mbs; 3648 if (extra_rows == bs) extra_rows = 0; 3649 else Mbs++; 3650 if (extra_rows && !rank) { 3651 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3652 } 3653 3654 /* determine ownership of all rows */ 3655 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3656 mbs = Mbs/size + ((Mbs % size) > rank); 3657 m = mbs*bs; 3658 } else { /* User set */ 3659 m = newmat->rmap->n; 3660 mbs = m/bs; 3661 } 3662 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3663 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3664 3665 /* process 0 needs enough room for process with most rows */ 3666 if (!rank) { 3667 mmax = rowners[1]; 3668 for (i=2; i<=size; i++) { 3669 mmax = PetscMax(mmax,rowners[i]); 3670 } 3671 mmax*=bs; 3672 } else mmax = m; 3673 3674 rowners[0] = 0; 3675 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3676 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3677 rstart = rowners[rank]; 3678 rend = rowners[rank+1]; 3679 3680 /* distribute row lengths to all processors */ 3681 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3682 if (!rank) { 3683 mend = m; 3684 if (size == 1) mend = mend - extra_rows; 3685 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3686 for (j=mend; j<m; j++) locrowlens[j] = 1; 3687 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3688 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3689 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3690 for (j=0; j<m; j++) { 3691 procsnz[0] += locrowlens[j]; 3692 } 3693 for (i=1; i<size; i++) { 3694 mend = browners[i+1] - browners[i]; 3695 if (i == size-1) mend = mend - extra_rows; 3696 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3697 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3698 /* calculate the number of nonzeros on each processor */ 3699 for (j=0; j<browners[i+1]-browners[i]; j++) { 3700 procsnz[i] += rowlengths[j]; 3701 } 3702 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3703 } 3704 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3705 } else { 3706 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3707 } 3708 3709 if (!rank) { 3710 /* determine max buffer needed and allocate it */ 3711 maxnz = procsnz[0]; 3712 for (i=1; i<size; i++) { 3713 maxnz = PetscMax(maxnz,procsnz[i]); 3714 } 3715 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3716 3717 /* read in my part of the matrix column indices */ 3718 nz = procsnz[0]; 3719 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3720 mycols = ibuf; 3721 if (size == 1) nz -= extra_rows; 3722 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3723 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3724 3725 /* read in every ones (except the last) and ship off */ 3726 for (i=1; i<size-1; i++) { 3727 nz = procsnz[i]; 3728 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3729 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3730 } 3731 /* read in the stuff for the last proc */ 3732 if (size != 1) { 3733 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3734 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3735 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3736 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3737 } 3738 ierr = PetscFree(cols);CHKERRQ(ierr); 3739 } else { 3740 /* determine buffer space needed for message */ 3741 nz = 0; 3742 for (i=0; i<m; i++) { 3743 nz += locrowlens[i]; 3744 } 3745 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3746 mycols = ibuf; 3747 /* receive message of column indices*/ 3748 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3749 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3750 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3751 } 3752 3753 /* loop over local rows, determining number of off diagonal entries */ 3754 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3755 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3756 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3757 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3758 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3759 rowcount = 0; nzcount = 0; 3760 for (i=0; i<mbs; i++) { 3761 dcount = 0; 3762 odcount = 0; 3763 for (j=0; j<bs; j++) { 3764 kmax = locrowlens[rowcount]; 3765 for (k=0; k<kmax; k++) { 3766 tmp = mycols[nzcount++]/bs; 3767 if (!mask[tmp]) { 3768 mask[tmp] = 1; 3769 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3770 else masked1[dcount++] = tmp; 3771 } 3772 } 3773 rowcount++; 3774 } 3775 3776 dlens[i] = dcount; 3777 odlens[i] = odcount; 3778 3779 /* zero out the mask elements we set */ 3780 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3781 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3782 } 3783 3784 3785 if (!sizesset) { 3786 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3787 } 3788 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3789 3790 if (!rank) { 3791 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3792 /* read in my part of the matrix numerical values */ 3793 nz = procsnz[0]; 3794 vals = buf; 3795 mycols = ibuf; 3796 if (size == 1) nz -= extra_rows; 3797 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3798 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3799 3800 /* insert into matrix */ 3801 jj = rstart*bs; 3802 for (i=0; i<m; i++) { 3803 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3804 mycols += locrowlens[i]; 3805 vals += locrowlens[i]; 3806 jj++; 3807 } 3808 /* read in other processors (except the last one) and ship out */ 3809 for (i=1; i<size-1; i++) { 3810 nz = procsnz[i]; 3811 vals = buf; 3812 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3813 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3814 } 3815 /* the last proc */ 3816 if (size != 1) { 3817 nz = procsnz[i] - extra_rows; 3818 vals = buf; 3819 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3820 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3821 ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3822 } 3823 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3824 } else { 3825 /* receive numeric values */ 3826 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3827 3828 /* receive message of values*/ 3829 vals = buf; 3830 mycols = ibuf; 3831 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3832 3833 /* insert into matrix */ 3834 jj = rstart*bs; 3835 for (i=0; i<m; i++) { 3836 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3837 mycols += locrowlens[i]; 3838 vals += locrowlens[i]; 3839 jj++; 3840 } 3841 } 3842 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3843 ierr = PetscFree(buf);CHKERRQ(ierr); 3844 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3845 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3846 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3847 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3848 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3849 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3850 PetscFunctionReturn(0); 3851 } 3852 3853 #undef __FUNCT__ 3854 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3855 /*@ 3856 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3857 3858 Input Parameters: 3859 . mat - the matrix 3860 . fact - factor 3861 3862 Not Collective, each process can use a different factor 3863 3864 Level: advanced 3865 3866 Notes: 3867 This can also be set by the command line option: -mat_use_hash_table <fact> 3868 3869 .keywords: matrix, hashtable, factor, HT 3870 3871 .seealso: MatSetOption() 3872 @*/ 3873 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3874 { 3875 PetscErrorCode ierr; 3876 3877 PetscFunctionBegin; 3878 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3879 PetscFunctionReturn(0); 3880 } 3881 3882 EXTERN_C_BEGIN 3883 #undef __FUNCT__ 3884 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3885 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3886 { 3887 Mat_MPIBAIJ *baij; 3888 3889 PetscFunctionBegin; 3890 baij = (Mat_MPIBAIJ*)mat->data; 3891 baij->ht_fact = fact; 3892 PetscFunctionReturn(0); 3893 } 3894 EXTERN_C_END 3895 3896 #undef __FUNCT__ 3897 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3898 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3899 { 3900 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3901 3902 PetscFunctionBegin; 3903 *Ad = a->A; 3904 *Ao = a->B; 3905 *colmap = a->garray; 3906 PetscFunctionReturn(0); 3907 } 3908 3909 /* 3910 Special version for direct calls from Fortran (to eliminate two function call overheads 3911 */ 3912 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3913 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3914 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3915 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3916 #endif 3917 3918 #undef __FUNCT__ 3919 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3920 /*@C 3921 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3922 3923 Collective on Mat 3924 3925 Input Parameters: 3926 + mat - the matrix 3927 . min - number of input rows 3928 . im - input rows 3929 . nin - number of input columns 3930 . in - input columns 3931 . v - numerical values input 3932 - addvin - INSERT_VALUES or ADD_VALUES 3933 3934 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3935 3936 Level: advanced 3937 3938 .seealso: MatSetValuesBlocked() 3939 @*/ 3940 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3941 { 3942 /* convert input arguments to C version */ 3943 Mat mat = *matin; 3944 PetscInt m = *min, n = *nin; 3945 InsertMode addv = *addvin; 3946 3947 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3948 const MatScalar *value; 3949 MatScalar *barray=baij->barray; 3950 PetscBool roworiented = baij->roworiented; 3951 PetscErrorCode ierr; 3952 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3953 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3954 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3955 3956 PetscFunctionBegin; 3957 /* tasks normally handled by MatSetValuesBlocked() */ 3958 if (mat->insertmode == NOT_SET_VALUES) { 3959 mat->insertmode = addv; 3960 } 3961 #if defined(PETSC_USE_DEBUG) 3962 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3963 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3964 #endif 3965 if (mat->assembled) { 3966 mat->was_assembled = PETSC_TRUE; 3967 mat->assembled = PETSC_FALSE; 3968 } 3969 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3970 3971 3972 if (!barray) { 3973 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3974 baij->barray = barray; 3975 } 3976 3977 if (roworiented) { 3978 stepval = (n-1)*bs; 3979 } else { 3980 stepval = (m-1)*bs; 3981 } 3982 for (i=0; i<m; i++) { 3983 if (im[i] < 0) continue; 3984 #if defined(PETSC_USE_DEBUG) 3985 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3986 #endif 3987 if (im[i] >= rstart && im[i] < rend) { 3988 row = im[i] - rstart; 3989 for (j=0; j<n; j++) { 3990 /* If NumCol = 1 then a copy is not required */ 3991 if ((roworiented) && (n == 1)) { 3992 barray = (MatScalar*)v + i*bs2; 3993 } else if ((!roworiented) && (m == 1)) { 3994 barray = (MatScalar*)v + j*bs2; 3995 } else { /* Here a copy is required */ 3996 if (roworiented) { 3997 value = v + i*(stepval+bs)*bs + j*bs; 3998 } else { 3999 value = v + j*(stepval+bs)*bs + i*bs; 4000 } 4001 for (ii=0; ii<bs; ii++,value+=stepval) { 4002 for (jj=0; jj<bs; jj++) { 4003 *barray++ = *value++; 4004 } 4005 } 4006 barray -=bs2; 4007 } 4008 4009 if (in[j] >= cstart && in[j] < cend) { 4010 col = in[j] - cstart; 4011 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4012 } 4013 else if (in[j] < 0) continue; 4014 #if defined(PETSC_USE_DEBUG) 4015 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 4016 #endif 4017 else { 4018 if (mat->was_assembled) { 4019 if (!baij->colmap) { 4020 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 4021 } 4022 4023 #if defined(PETSC_USE_DEBUG) 4024 #if defined (PETSC_USE_CTABLE) 4025 { PetscInt data; 4026 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 4027 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4028 } 4029 #else 4030 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4031 #endif 4032 #endif 4033 #if defined (PETSC_USE_CTABLE) 4034 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4035 col = (col - 1)/bs; 4036 #else 4037 col = (baij->colmap[in[j]] - 1)/bs; 4038 #endif 4039 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 4040 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 4041 col = in[j]; 4042 } 4043 } 4044 else col = in[j]; 4045 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4046 } 4047 } 4048 } else { 4049 if (!baij->donotstash) { 4050 if (roworiented) { 4051 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4052 } else { 4053 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4054 } 4055 } 4056 } 4057 } 4058 4059 /* task normally handled by MatSetValuesBlocked() */ 4060 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4061 PetscFunctionReturn(0); 4062 } 4063 4064 #undef __FUNCT__ 4065 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 4066 /*@ 4067 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 4068 CSR format the local rows. 4069 4070 Collective on MPI_Comm 4071 4072 Input Parameters: 4073 + comm - MPI communicator 4074 . bs - the block size, only a block size of 1 is supported 4075 . m - number of local rows (Cannot be PETSC_DECIDE) 4076 . n - This value should be the same as the local size used in creating the 4077 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4078 calculated if N is given) For square matrices n is almost always m. 4079 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4080 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4081 . i - row indices 4082 . j - column indices 4083 - a - matrix values 4084 4085 Output Parameter: 4086 . mat - the matrix 4087 4088 Level: intermediate 4089 4090 Notes: 4091 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4092 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4093 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4094 4095 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4096 4097 .keywords: matrix, aij, compressed row, sparse, parallel 4098 4099 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4100 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4101 @*/ 4102 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4103 { 4104 PetscErrorCode ierr; 4105 4106 PetscFunctionBegin; 4107 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4108 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4109 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4110 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4111 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 4112 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 4113 PetscFunctionReturn(0); 4114 } 4115