1 2 #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 3 4 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 5 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat); 6 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 7 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 8 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 9 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 10 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 11 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13 EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat); 14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*); 15 16 /* UGLY, ugly, ugly 17 When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 18 not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 19 inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 20 converts the entries into single precision and then calls ..._MatScalar() to put them 21 into the single precision data structures. 22 */ 23 #if defined(PETSC_USE_MAT_SINGLE) 24 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 25 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 26 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 27 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 28 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode); 29 #else 30 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 31 #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ 32 #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ 33 #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT 34 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT 35 #endif 36 37 #undef __FUNCT__ 38 #define __FUNCT__ "MatGetRowMax_MPIBAIJ" 39 PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v) 40 { 41 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 42 PetscErrorCode ierr; 43 PetscInt i; 44 PetscScalar *va,*vb; 45 Vec vtmp; 46 47 PetscFunctionBegin; 48 49 ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr); 50 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 51 52 ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr); 53 ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr); 54 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 55 56 for (i=0; i<A->m; i++){ 57 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i]; 58 } 59 60 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 61 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 62 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 63 64 PetscFunctionReturn(0); 65 } 66 67 EXTERN_C_BEGIN 68 #undef __FUNCT__ 69 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 70 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 71 { 72 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 73 PetscErrorCode ierr; 74 75 PetscFunctionBegin; 76 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 77 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 EXTERN_C_END 81 82 EXTERN_C_BEGIN 83 #undef __FUNCT__ 84 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 85 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 86 { 87 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 88 PetscErrorCode ierr; 89 90 PetscFunctionBegin; 91 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 92 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 93 PetscFunctionReturn(0); 94 } 95 EXTERN_C_END 96 97 /* 98 Local utility routine that creates a mapping from the global column 99 number to the local number in the off-diagonal part of the local 100 storage of the matrix. This is done in a non scable way since the 101 length of colmap equals the global matrix length. 102 */ 103 #undef __FUNCT__ 104 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 105 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 106 { 107 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 108 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 109 PetscErrorCode ierr; 110 PetscInt nbs = B->nbs,i,bs=mat->bs; 111 112 PetscFunctionBegin; 113 #if defined (PETSC_USE_CTABLE) 114 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 115 for (i=0; i<nbs; i++){ 116 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 117 } 118 #else 119 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 120 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 121 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 122 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 123 #endif 124 PetscFunctionReturn(0); 125 } 126 127 #define CHUNKSIZE 10 128 129 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 130 { \ 131 \ 132 brow = row/bs; \ 133 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 134 rmax = aimax[brow]; nrow = ailen[brow]; \ 135 bcol = col/bs; \ 136 ridx = row % bs; cidx = col % bs; \ 137 low = 0; high = nrow; \ 138 while (high-low > 3) { \ 139 t = (low+high)/2; \ 140 if (rp[t] > bcol) high = t; \ 141 else low = t; \ 142 } \ 143 for (_i=low; _i<high; _i++) { \ 144 if (rp[_i] > bcol) break; \ 145 if (rp[_i] == bcol) { \ 146 bap = ap + bs2*_i + bs*cidx + ridx; \ 147 if (addv == ADD_VALUES) *bap += value; \ 148 else *bap = value; \ 149 goto a_noinsert; \ 150 } \ 151 } \ 152 if (a->nonew == 1) goto a_noinsert; \ 153 else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 154 if (nrow >= rmax) { \ 155 /* there is no extra room in row, therefore enlarge */ \ 156 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 157 MatScalar *new_a; \ 158 \ 159 if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 160 \ 161 /* malloc new storage space */ \ 162 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); \ 163 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 164 new_j = (PetscInt*)(new_a + bs2*new_nz); \ 165 new_i = new_j + new_nz; \ 166 \ 167 /* copy over old data into new slots */ \ 168 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 169 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 170 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \ 171 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 172 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \ 173 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 174 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \ 175 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 176 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 177 /* free up old matrix storage */ \ 178 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 179 if (!a->singlemalloc) { \ 180 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 181 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 182 } \ 183 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 184 a->singlemalloc = PETSC_TRUE; \ 185 \ 186 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 187 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 188 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); \ 189 a->maxnz += bs2*CHUNKSIZE; \ 190 a->reallocs++; \ 191 a->nz++; \ 192 } \ 193 N = nrow++ - 1; \ 194 /* shift up all the later entries in this row */ \ 195 for (ii=N; ii>=_i; ii--) { \ 196 rp[ii+1] = rp[ii]; \ 197 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 198 } \ 199 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 200 rp[_i] = bcol; \ 201 ap[bs2*_i + bs*cidx + ridx] = value; \ 202 a_noinsert:; \ 203 ailen[brow] = nrow; \ 204 } 205 206 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 207 { \ 208 brow = row/bs; \ 209 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 210 rmax = bimax[brow]; nrow = bilen[brow]; \ 211 bcol = col/bs; \ 212 ridx = row % bs; cidx = col % bs; \ 213 low = 0; high = nrow; \ 214 while (high-low > 3) { \ 215 t = (low+high)/2; \ 216 if (rp[t] > bcol) high = t; \ 217 else low = t; \ 218 } \ 219 for (_i=low; _i<high; _i++) { \ 220 if (rp[_i] > bcol) break; \ 221 if (rp[_i] == bcol) { \ 222 bap = ap + bs2*_i + bs*cidx + ridx; \ 223 if (addv == ADD_VALUES) *bap += value; \ 224 else *bap = value; \ 225 goto b_noinsert; \ 226 } \ 227 } \ 228 if (b->nonew == 1) goto b_noinsert; \ 229 else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 230 if (nrow >= rmax) { \ 231 /* there is no extra room in row, therefore enlarge */ \ 232 PetscInt new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 233 MatScalar *new_a; \ 234 \ 235 if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 236 \ 237 /* malloc new storage space */ \ 238 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(PetscInt); \ 239 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 240 new_j = (PetscInt*)(new_a + bs2*new_nz); \ 241 new_i = new_j + new_nz; \ 242 \ 243 /* copy over old data into new slots */ \ 244 for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 245 for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 246 ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \ 247 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 248 ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \ 249 ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 250 ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 251 ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 252 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 253 /* free up old matrix storage */ \ 254 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 255 if (!b->singlemalloc) { \ 256 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 257 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 258 } \ 259 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 260 b->singlemalloc = PETSC_TRUE; \ 261 \ 262 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 263 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 264 ierr = PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); \ 265 b->maxnz += bs2*CHUNKSIZE; \ 266 b->reallocs++; \ 267 b->nz++; \ 268 } \ 269 N = nrow++ - 1; \ 270 /* shift up all the later entries in this row */ \ 271 for (ii=N; ii>=_i; ii--) { \ 272 rp[ii+1] = rp[ii]; \ 273 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 274 } \ 275 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 276 rp[_i] = bcol; \ 277 ap[bs2*_i + bs*cidx + ridx] = value; \ 278 b_noinsert:; \ 279 bilen[brow] = nrow; \ 280 } 281 282 #if defined(PETSC_USE_MAT_SINGLE) 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatSetValues_MPIBAIJ" 285 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 286 { 287 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 288 PetscErrorCode ierr; 289 PetscInt i,N = m*n; 290 MatScalar *vsingle; 291 292 PetscFunctionBegin; 293 if (N > b->setvalueslen) { 294 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 295 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 296 b->setvalueslen = N; 297 } 298 vsingle = b->setvaluescopy; 299 300 for (i=0; i<N; i++) { 301 vsingle[i] = v[i]; 302 } 303 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 304 PetscFunctionReturn(0); 305 } 306 307 #undef __FUNCT__ 308 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 309 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 310 { 311 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 312 PetscErrorCode ierr; 313 PetscInt i,N = m*n*b->bs2; 314 MatScalar *vsingle; 315 316 PetscFunctionBegin; 317 if (N > b->setvalueslen) { 318 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 319 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 320 b->setvalueslen = N; 321 } 322 vsingle = b->setvaluescopy; 323 for (i=0; i<N; i++) { 324 vsingle[i] = v[i]; 325 } 326 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 327 PetscFunctionReturn(0); 328 } 329 330 #undef __FUNCT__ 331 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 332 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 333 { 334 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 335 PetscErrorCode ierr; 336 PetscInt i,N = m*n; 337 MatScalar *vsingle; 338 339 PetscFunctionBegin; 340 if (N > b->setvalueslen) { 341 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 342 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 343 b->setvalueslen = N; 344 } 345 vsingle = b->setvaluescopy; 346 for (i=0; i<N; i++) { 347 vsingle[i] = v[i]; 348 } 349 ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 350 PetscFunctionReturn(0); 351 } 352 353 #undef __FUNCT__ 354 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 355 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 356 { 357 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 358 PetscErrorCode ierr; 359 PetscInt i,N = m*n*b->bs2; 360 MatScalar *vsingle; 361 362 PetscFunctionBegin; 363 if (N > b->setvalueslen) { 364 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 365 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 366 b->setvalueslen = N; 367 } 368 vsingle = b->setvaluescopy; 369 for (i=0; i<N; i++) { 370 vsingle[i] = v[i]; 371 } 372 ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 373 PetscFunctionReturn(0); 374 } 375 #endif 376 377 #undef __FUNCT__ 378 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar" 379 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 380 { 381 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 382 MatScalar value; 383 PetscTruth roworiented = baij->roworiented; 384 PetscErrorCode ierr; 385 PetscInt i,j,row,col; 386 PetscInt rstart_orig=baij->rstart_bs; 387 PetscInt rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 388 PetscInt cend_orig=baij->cend_bs,bs=mat->bs; 389 390 /* Some Variables required in the macro */ 391 Mat A = baij->A; 392 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 393 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 394 MatScalar *aa=a->a; 395 396 Mat B = baij->B; 397 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 398 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 399 MatScalar *ba=b->a; 400 401 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 402 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 403 MatScalar *ap,*bap; 404 405 PetscFunctionBegin; 406 for (i=0; i<m; i++) { 407 if (im[i] < 0) continue; 408 #if defined(PETSC_USE_DEBUG) 409 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 410 #endif 411 if (im[i] >= rstart_orig && im[i] < rend_orig) { 412 row = im[i] - rstart_orig; 413 for (j=0; j<n; j++) { 414 if (in[j] >= cstart_orig && in[j] < cend_orig){ 415 col = in[j] - cstart_orig; 416 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 417 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 418 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 419 } else if (in[j] < 0) continue; 420 #if defined(PETSC_USE_DEBUG) 421 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->N-1);} 422 #endif 423 else { 424 if (mat->was_assembled) { 425 if (!baij->colmap) { 426 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 427 } 428 #if defined (PETSC_USE_CTABLE) 429 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 430 col = col - 1; 431 #else 432 col = baij->colmap[in[j]/bs] - 1; 433 #endif 434 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 435 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 436 col = in[j]; 437 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 438 B = baij->B; 439 b = (Mat_SeqBAIJ*)(B)->data; 440 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 441 ba=b->a; 442 } else col += in[j]%bs; 443 } else col = in[j]; 444 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 445 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 446 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 447 } 448 } 449 } else { 450 if (!baij->donotstash) { 451 if (roworiented) { 452 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 453 } else { 454 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 455 } 456 } 457 } 458 } 459 PetscFunctionReturn(0); 460 } 461 462 #undef __FUNCT__ 463 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar" 464 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 465 { 466 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 467 const MatScalar *value; 468 MatScalar *barray=baij->barray; 469 PetscTruth roworiented = baij->roworiented; 470 PetscErrorCode ierr; 471 PetscInt i,j,ii,jj,row,col,rstart=baij->rstart; 472 PetscInt rend=baij->rend,cstart=baij->cstart,stepval; 473 PetscInt cend=baij->cend,bs=mat->bs,bs2=baij->bs2; 474 475 PetscFunctionBegin; 476 if(!barray) { 477 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 478 baij->barray = barray; 479 } 480 481 if (roworiented) { 482 stepval = (n-1)*bs; 483 } else { 484 stepval = (m-1)*bs; 485 } 486 for (i=0; i<m; i++) { 487 if (im[i] < 0) continue; 488 #if defined(PETSC_USE_DEBUG) 489 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 490 #endif 491 if (im[i] >= rstart && im[i] < rend) { 492 row = im[i] - rstart; 493 for (j=0; j<n; j++) { 494 /* If NumCol = 1 then a copy is not required */ 495 if ((roworiented) && (n == 1)) { 496 barray = (MatScalar*)v + i*bs2; 497 } else if((!roworiented) && (m == 1)) { 498 barray = (MatScalar*)v + j*bs2; 499 } else { /* Here a copy is required */ 500 if (roworiented) { 501 value = v + i*(stepval+bs)*bs + j*bs; 502 } else { 503 value = v + j*(stepval+bs)*bs + i*bs; 504 } 505 for (ii=0; ii<bs; ii++,value+=stepval) { 506 for (jj=0; jj<bs; jj++) { 507 *barray++ = *value++; 508 } 509 } 510 barray -=bs2; 511 } 512 513 if (in[j] >= cstart && in[j] < cend){ 514 col = in[j] - cstart; 515 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 516 } 517 else if (in[j] < 0) continue; 518 #if defined(PETSC_USE_DEBUG) 519 else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);} 520 #endif 521 else { 522 if (mat->was_assembled) { 523 if (!baij->colmap) { 524 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 525 } 526 527 #if defined(PETSC_USE_DEBUG) 528 #if defined (PETSC_USE_CTABLE) 529 { PetscInt data; 530 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 531 if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 532 } 533 #else 534 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap"); 535 #endif 536 #endif 537 #if defined (PETSC_USE_CTABLE) 538 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 539 col = (col - 1)/bs; 540 #else 541 col = (baij->colmap[in[j]] - 1)/bs; 542 #endif 543 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 544 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 545 col = in[j]; 546 } 547 } 548 else col = in[j]; 549 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 550 } 551 } 552 } else { 553 if (!baij->donotstash) { 554 if (roworiented) { 555 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 556 } else { 557 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 558 } 559 } 560 } 561 } 562 PetscFunctionReturn(0); 563 } 564 565 #define HASH_KEY 0.6180339887 566 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 567 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 568 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 569 #undef __FUNCT__ 570 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar" 571 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 572 { 573 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 574 PetscTruth roworiented = baij->roworiented; 575 PetscErrorCode ierr; 576 PetscInt i,j,row,col; 577 PetscInt rstart_orig=baij->rstart_bs; 578 PetscInt rend_orig=baij->rend_bs,Nbs=baij->Nbs; 579 PetscInt h1,key,size=baij->ht_size,bs=mat->bs,*HT=baij->ht,idx; 580 PetscReal tmp; 581 MatScalar **HD = baij->hd,value; 582 #if defined(PETSC_USE_DEBUG) 583 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 584 #endif 585 586 PetscFunctionBegin; 587 588 for (i=0; i<m; i++) { 589 #if defined(PETSC_USE_DEBUG) 590 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 591 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 592 #endif 593 row = im[i]; 594 if (row >= rstart_orig && row < rend_orig) { 595 for (j=0; j<n; j++) { 596 col = in[j]; 597 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 598 /* Look up PetscInto the Hash Table */ 599 key = (row/bs)*Nbs+(col/bs)+1; 600 h1 = HASH(size,key,tmp); 601 602 603 idx = h1; 604 #if defined(PETSC_USE_DEBUG) 605 insert_ct++; 606 total_ct++; 607 if (HT[idx] != key) { 608 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 609 if (idx == size) { 610 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 611 if (idx == h1) { 612 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 613 } 614 } 615 } 616 #else 617 if (HT[idx] != key) { 618 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 619 if (idx == size) { 620 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 621 if (idx == h1) { 622 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 623 } 624 } 625 } 626 #endif 627 /* A HASH table entry is found, so insert the values at the correct address */ 628 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 629 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 630 } 631 } else { 632 if (!baij->donotstash) { 633 if (roworiented) { 634 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 635 } else { 636 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 637 } 638 } 639 } 640 } 641 #if defined(PETSC_USE_DEBUG) 642 baij->ht_total_ct = total_ct; 643 baij->ht_insert_ct = insert_ct; 644 #endif 645 PetscFunctionReturn(0); 646 } 647 648 #undef __FUNCT__ 649 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar" 650 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 651 { 652 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 653 PetscTruth roworiented = baij->roworiented; 654 PetscErrorCode ierr; 655 PetscInt i,j,ii,jj,row,col; 656 PetscInt rstart=baij->rstart ; 657 PetscInt rend=baij->rend,stepval,bs=mat->bs,bs2=baij->bs2; 658 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 659 PetscReal tmp; 660 MatScalar **HD = baij->hd,*baij_a; 661 const MatScalar *v_t,*value; 662 #if defined(PETSC_USE_DEBUG) 663 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 664 #endif 665 666 PetscFunctionBegin; 667 668 if (roworiented) { 669 stepval = (n-1)*bs; 670 } else { 671 stepval = (m-1)*bs; 672 } 673 for (i=0; i<m; i++) { 674 #if defined(PETSC_USE_DEBUG) 675 if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 676 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 677 #endif 678 row = im[i]; 679 v_t = v + i*bs2; 680 if (row >= rstart && row < rend) { 681 for (j=0; j<n; j++) { 682 col = in[j]; 683 684 /* Look up into the Hash Table */ 685 key = row*Nbs+col+1; 686 h1 = HASH(size,key,tmp); 687 688 idx = h1; 689 #if defined(PETSC_USE_DEBUG) 690 total_ct++; 691 insert_ct++; 692 if (HT[idx] != key) { 693 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 694 if (idx == size) { 695 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 696 if (idx == h1) { 697 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 698 } 699 } 700 } 701 #else 702 if (HT[idx] != key) { 703 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 704 if (idx == size) { 705 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 706 if (idx == h1) { 707 SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 708 } 709 } 710 } 711 #endif 712 baij_a = HD[idx]; 713 if (roworiented) { 714 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 715 /* value = v + (i*(stepval+bs)+j)*bs; */ 716 value = v_t; 717 v_t += bs; 718 if (addv == ADD_VALUES) { 719 for (ii=0; ii<bs; ii++,value+=stepval) { 720 for (jj=ii; jj<bs2; jj+=bs) { 721 baij_a[jj] += *value++; 722 } 723 } 724 } else { 725 for (ii=0; ii<bs; ii++,value+=stepval) { 726 for (jj=ii; jj<bs2; jj+=bs) { 727 baij_a[jj] = *value++; 728 } 729 } 730 } 731 } else { 732 value = v + j*(stepval+bs)*bs + i*bs; 733 if (addv == ADD_VALUES) { 734 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 735 for (jj=0; jj<bs; jj++) { 736 baij_a[jj] += *value++; 737 } 738 } 739 } else { 740 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 741 for (jj=0; jj<bs; jj++) { 742 baij_a[jj] = *value++; 743 } 744 } 745 } 746 } 747 } 748 } else { 749 if (!baij->donotstash) { 750 if (roworiented) { 751 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 752 } else { 753 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 754 } 755 } 756 } 757 } 758 #if defined(PETSC_USE_DEBUG) 759 baij->ht_total_ct = total_ct; 760 baij->ht_insert_ct = insert_ct; 761 #endif 762 PetscFunctionReturn(0); 763 } 764 765 #undef __FUNCT__ 766 #define __FUNCT__ "MatGetValues_MPIBAIJ" 767 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 768 { 769 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 770 PetscErrorCode ierr; 771 PetscInt bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 772 PetscInt bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 773 774 PetscFunctionBegin; 775 for (i=0; i<m; i++) { 776 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 777 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 778 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 779 row = idxm[i] - bsrstart; 780 for (j=0; j<n; j++) { 781 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 782 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 783 if (idxn[j] >= bscstart && idxn[j] < bscend){ 784 col = idxn[j] - bscstart; 785 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 786 } else { 787 if (!baij->colmap) { 788 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 789 } 790 #if defined (PETSC_USE_CTABLE) 791 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 792 data --; 793 #else 794 data = baij->colmap[idxn[j]/bs]-1; 795 #endif 796 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 797 else { 798 col = data + idxn[j]%bs; 799 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 800 } 801 } 802 } 803 } else { 804 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 805 } 806 } 807 PetscFunctionReturn(0); 808 } 809 810 #undef __FUNCT__ 811 #define __FUNCT__ "MatNorm_MPIBAIJ" 812 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 813 { 814 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 815 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 816 PetscErrorCode ierr; 817 PetscInt i,bs2=baij->bs2; 818 PetscReal sum = 0.0; 819 MatScalar *v; 820 821 PetscFunctionBegin; 822 if (baij->size == 1) { 823 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 824 } else { 825 if (type == NORM_FROBENIUS) { 826 v = amat->a; 827 for (i=0; i<amat->nz*bs2; i++) { 828 #if defined(PETSC_USE_COMPLEX) 829 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 830 #else 831 sum += (*v)*(*v); v++; 832 #endif 833 } 834 v = bmat->a; 835 for (i=0; i<bmat->nz*bs2; i++) { 836 #if defined(PETSC_USE_COMPLEX) 837 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 838 #else 839 sum += (*v)*(*v); v++; 840 #endif 841 } 842 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 843 *nrm = sqrt(*nrm); 844 } else { 845 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 846 } 847 } 848 PetscFunctionReturn(0); 849 } 850 851 852 /* 853 Creates the hash table, and sets the table 854 This table is created only once. 855 If new entried need to be added to the matrix 856 then the hash table has to be destroyed and 857 recreated. 858 */ 859 #undef __FUNCT__ 860 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 861 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 862 { 863 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 864 Mat A = baij->A,B=baij->B; 865 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 866 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 867 PetscErrorCode ierr; 868 PetscInt size,bs2=baij->bs2,rstart=baij->rstart; 869 PetscInt cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 870 PetscInt *HT,key; 871 MatScalar **HD; 872 PetscReal tmp; 873 #if defined(PETSC_USE_DEBUG) 874 PetscInt ct=0,max=0; 875 #endif 876 877 PetscFunctionBegin; 878 baij->ht_size=(PetscInt)(factor*nz); 879 size = baij->ht_size; 880 881 if (baij->ht) { 882 PetscFunctionReturn(0); 883 } 884 885 /* Allocate Memory for Hash Table */ 886 ierr = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr); 887 baij->ht = (PetscInt*)(baij->hd + size); 888 HD = baij->hd; 889 HT = baij->ht; 890 891 892 ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr); 893 894 895 /* Loop Over A */ 896 for (i=0; i<a->mbs; i++) { 897 for (j=ai[i]; j<ai[i+1]; j++) { 898 row = i+rstart; 899 col = aj[j]+cstart; 900 901 key = row*Nbs + col + 1; 902 h1 = HASH(size,key,tmp); 903 for (k=0; k<size; k++){ 904 if (!HT[(h1+k)%size]) { 905 HT[(h1+k)%size] = key; 906 HD[(h1+k)%size] = a->a + j*bs2; 907 break; 908 #if defined(PETSC_USE_DEBUG) 909 } else { 910 ct++; 911 #endif 912 } 913 } 914 #if defined(PETSC_USE_DEBUG) 915 if (k> max) max = k; 916 #endif 917 } 918 } 919 /* Loop Over B */ 920 for (i=0; i<b->mbs; i++) { 921 for (j=bi[i]; j<bi[i+1]; j++) { 922 row = i+rstart; 923 col = garray[bj[j]]; 924 key = row*Nbs + col + 1; 925 h1 = HASH(size,key,tmp); 926 for (k=0; k<size; k++){ 927 if (!HT[(h1+k)%size]) { 928 HT[(h1+k)%size] = key; 929 HD[(h1+k)%size] = b->a + j*bs2; 930 break; 931 #if defined(PETSC_USE_DEBUG) 932 } else { 933 ct++; 934 #endif 935 } 936 } 937 #if defined(PETSC_USE_DEBUG) 938 if (k> max) max = k; 939 #endif 940 } 941 } 942 943 /* Print Summary */ 944 #if defined(PETSC_USE_DEBUG) 945 for (i=0,j=0; i<size; i++) { 946 if (HT[i]) {j++;} 947 } 948 PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max); 949 #endif 950 PetscFunctionReturn(0); 951 } 952 953 #undef __FUNCT__ 954 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 955 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 956 { 957 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 958 PetscErrorCode ierr; 959 PetscInt nstash,reallocs; 960 InsertMode addv; 961 962 PetscFunctionBegin; 963 if (baij->donotstash) { 964 PetscFunctionReturn(0); 965 } 966 967 /* make sure all processors are either in INSERTMODE or ADDMODE */ 968 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 969 if (addv == (ADD_VALUES|INSERT_VALUES)) { 970 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 971 } 972 mat->insertmode = addv; /* in case this processor had no cache */ 973 974 ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 975 ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 976 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 977 PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs); 978 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 979 PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs); 980 PetscFunctionReturn(0); 981 } 982 983 #undef __FUNCT__ 984 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 985 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 986 { 987 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 988 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data; 989 PetscErrorCode ierr; 990 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 991 PetscInt *row,*col,other_disassembled; 992 PetscTruth r1,r2,r3; 993 MatScalar *val; 994 InsertMode addv = mat->insertmode; 995 PetscMPIInt n; 996 997 PetscFunctionBegin; 998 if (!baij->donotstash) { 999 while (1) { 1000 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1001 if (!flg) break; 1002 1003 for (i=0; i<n;) { 1004 /* Now identify the consecutive vals belonging to the same row */ 1005 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1006 if (j < n) ncols = j-i; 1007 else ncols = n-i; 1008 /* Now assemble all these values with a single function call */ 1009 ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 1010 i = j; 1011 } 1012 } 1013 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 1014 /* Now process the block-stash. Since the values are stashed column-oriented, 1015 set the roworiented flag to column oriented, and after MatSetValues() 1016 restore the original flags */ 1017 r1 = baij->roworiented; 1018 r2 = a->roworiented; 1019 r3 = b->roworiented; 1020 baij->roworiented = PETSC_FALSE; 1021 a->roworiented = PETSC_FALSE; 1022 b->roworiented = PETSC_FALSE; 1023 while (1) { 1024 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1025 if (!flg) break; 1026 1027 for (i=0; i<n;) { 1028 /* Now identify the consecutive vals belonging to the same row */ 1029 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1030 if (j < n) ncols = j-i; 1031 else ncols = n-i; 1032 ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1033 i = j; 1034 } 1035 } 1036 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1037 baij->roworiented = r1; 1038 a->roworiented = r2; 1039 b->roworiented = r3; 1040 } 1041 1042 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1043 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1044 1045 /* determine if any processor has disassembled, if so we must 1046 also disassemble ourselfs, in order that we may reassemble. */ 1047 /* 1048 if nonzero structure of submatrix B cannot change then we know that 1049 no processor disassembled thus we can skip this stuff 1050 */ 1051 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1052 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1053 if (mat->was_assembled && !other_disassembled) { 1054 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1055 } 1056 } 1057 1058 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1059 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 1060 } 1061 b->compressedrow.use = PETSC_TRUE; 1062 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1063 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1064 1065 #if defined(PETSC_USE_DEBUG) 1066 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1067 PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct); 1068 baij->ht_total_ct = 0; 1069 baij->ht_insert_ct = 0; 1070 } 1071 #endif 1072 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1073 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1074 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1075 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1076 } 1077 1078 if (baij->rowvalues) { 1079 ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1080 baij->rowvalues = 0; 1081 } 1082 PetscFunctionReturn(0); 1083 } 1084 1085 #undef __FUNCT__ 1086 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 1087 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1088 { 1089 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1090 PetscErrorCode ierr; 1091 PetscMPIInt size = baij->size,rank = baij->rank; 1092 PetscInt bs = mat->bs; 1093 PetscTruth iascii,isdraw; 1094 PetscViewer sviewer; 1095 PetscViewerFormat format; 1096 1097 PetscFunctionBegin; 1098 /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */ 1099 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1100 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1101 if (iascii) { 1102 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1103 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1104 MatInfo info; 1105 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1106 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1107 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 1108 rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs, 1109 mat->bs,(PetscInt)info.memory);CHKERRQ(ierr); 1110 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1111 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1112 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1113 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr); 1114 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1115 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1116 PetscFunctionReturn(0); 1117 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1118 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1119 PetscFunctionReturn(0); 1120 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1121 PetscFunctionReturn(0); 1122 } 1123 } 1124 1125 if (isdraw) { 1126 PetscDraw draw; 1127 PetscTruth isnull; 1128 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1129 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1130 } 1131 1132 if (size == 1) { 1133 ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr); 1134 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1135 } else { 1136 /* assemble the entire matrix onto first processor. */ 1137 Mat A; 1138 Mat_SeqBAIJ *Aloc; 1139 PetscInt M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1140 MatScalar *a; 1141 1142 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1143 /* Perhaps this should be the type of mat? */ 1144 if (!rank) { 1145 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 1146 } else { 1147 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 1148 } 1149 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1150 ierr = MatMPIBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1151 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1152 1153 /* copy over the A part */ 1154 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1155 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1156 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1157 1158 for (i=0; i<mbs; i++) { 1159 rvals[0] = bs*(baij->rstart + i); 1160 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1161 for (j=ai[i]; j<ai[i+1]; j++) { 1162 col = (baij->cstart+aj[j])*bs; 1163 for (k=0; k<bs; k++) { 1164 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1165 col++; a += bs; 1166 } 1167 } 1168 } 1169 /* copy over the B part */ 1170 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1171 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1172 for (i=0; i<mbs; i++) { 1173 rvals[0] = bs*(baij->rstart + i); 1174 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1175 for (j=ai[i]; j<ai[i+1]; j++) { 1176 col = baij->garray[aj[j]]*bs; 1177 for (k=0; k<bs; k++) { 1178 ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1179 col++; a += bs; 1180 } 1181 } 1182 } 1183 ierr = PetscFree(rvals);CHKERRQ(ierr); 1184 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1185 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1186 /* 1187 Everyone has to call to draw the matrix since the graphics waits are 1188 synchronized across all processors that share the PetscDraw object 1189 */ 1190 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1191 if (!rank) { 1192 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1193 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1194 } 1195 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1196 ierr = MatDestroy(A);CHKERRQ(ierr); 1197 } 1198 PetscFunctionReturn(0); 1199 } 1200 1201 #undef __FUNCT__ 1202 #define __FUNCT__ "MatView_MPIBAIJ" 1203 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1204 { 1205 PetscErrorCode ierr; 1206 PetscTruth iascii,isdraw,issocket,isbinary; 1207 1208 PetscFunctionBegin; 1209 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1210 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1211 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1212 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1213 if (iascii || isdraw || issocket || isbinary) { 1214 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1215 } else { 1216 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1217 } 1218 PetscFunctionReturn(0); 1219 } 1220 1221 #undef __FUNCT__ 1222 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1223 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1224 { 1225 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1226 PetscErrorCode ierr; 1227 1228 PetscFunctionBegin; 1229 #if defined(PETSC_USE_LOG) 1230 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N); 1231 #endif 1232 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1233 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1234 ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1235 ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1236 ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1237 #if defined (PETSC_USE_CTABLE) 1238 if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1239 #else 1240 if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1241 #endif 1242 if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1243 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1244 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1245 if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1246 if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1247 if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1248 #if defined(PETSC_USE_MAT_SINGLE) 1249 if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 1250 #endif 1251 ierr = PetscFree(baij);CHKERRQ(ierr); 1252 1253 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1254 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1255 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1256 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1257 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1258 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1259 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1260 PetscFunctionReturn(0); 1261 } 1262 1263 #undef __FUNCT__ 1264 #define __FUNCT__ "MatMult_MPIBAIJ" 1265 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1266 { 1267 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1268 PetscErrorCode ierr; 1269 PetscInt nt; 1270 1271 PetscFunctionBegin; 1272 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1273 if (nt != A->n) { 1274 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1275 } 1276 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1277 if (nt != A->m) { 1278 SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1279 } 1280 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1281 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1282 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1283 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1284 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1285 PetscFunctionReturn(0); 1286 } 1287 1288 #undef __FUNCT__ 1289 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1290 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1291 { 1292 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1293 PetscErrorCode ierr; 1294 1295 PetscFunctionBegin; 1296 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1297 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1298 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1299 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1300 PetscFunctionReturn(0); 1301 } 1302 1303 #undef __FUNCT__ 1304 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1305 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1306 { 1307 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1308 PetscErrorCode ierr; 1309 PetscTruth merged; 1310 1311 PetscFunctionBegin; 1312 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1313 /* do nondiagonal part */ 1314 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1315 if (!merged) { 1316 /* send it on its way */ 1317 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1318 /* do local part */ 1319 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1320 /* receive remote parts: note this assumes the values are not actually */ 1321 /* inserted in yy until the next line */ 1322 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1323 } else { 1324 /* do local part */ 1325 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1326 /* send it on its way */ 1327 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1328 /* values actually were received in the Begin() but we need to call this nop */ 1329 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1330 } 1331 PetscFunctionReturn(0); 1332 } 1333 1334 #undef __FUNCT__ 1335 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1336 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1337 { 1338 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1339 PetscErrorCode ierr; 1340 1341 PetscFunctionBegin; 1342 /* do nondiagonal part */ 1343 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1344 /* send it on its way */ 1345 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1346 /* do local part */ 1347 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1348 /* receive remote parts: note this assumes the values are not actually */ 1349 /* inserted in yy until the next line, which is true for my implementation*/ 1350 /* but is not perhaps always true. */ 1351 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1352 PetscFunctionReturn(0); 1353 } 1354 1355 /* 1356 This only works correctly for square matrices where the subblock A->A is the 1357 diagonal block 1358 */ 1359 #undef __FUNCT__ 1360 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1361 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1362 { 1363 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1364 PetscErrorCode ierr; 1365 1366 PetscFunctionBegin; 1367 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1368 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1369 PetscFunctionReturn(0); 1370 } 1371 1372 #undef __FUNCT__ 1373 #define __FUNCT__ "MatScale_MPIBAIJ" 1374 PetscErrorCode MatScale_MPIBAIJ(const PetscScalar *aa,Mat A) 1375 { 1376 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1377 PetscErrorCode ierr; 1378 1379 PetscFunctionBegin; 1380 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1381 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1382 PetscFunctionReturn(0); 1383 } 1384 1385 #undef __FUNCT__ 1386 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1387 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1388 { 1389 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1390 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1391 PetscErrorCode ierr; 1392 PetscInt bs = matin->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1393 PetscInt nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1394 PetscInt *cmap,*idx_p,cstart = mat->cstart; 1395 1396 PetscFunctionBegin; 1397 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1398 mat->getrowactive = PETSC_TRUE; 1399 1400 if (!mat->rowvalues && (idx || v)) { 1401 /* 1402 allocate enough space to hold information from the longest row. 1403 */ 1404 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1405 PetscInt max = 1,mbs = mat->mbs,tmp; 1406 for (i=0; i<mbs; i++) { 1407 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1408 if (max < tmp) { max = tmp; } 1409 } 1410 ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1411 mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2); 1412 } 1413 1414 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows") 1415 lrow = row - brstart; 1416 1417 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1418 if (!v) {pvA = 0; pvB = 0;} 1419 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1420 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1421 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1422 nztot = nzA + nzB; 1423 1424 cmap = mat->garray; 1425 if (v || idx) { 1426 if (nztot) { 1427 /* Sort by increasing column numbers, assuming A and B already sorted */ 1428 PetscInt imark = -1; 1429 if (v) { 1430 *v = v_p = mat->rowvalues; 1431 for (i=0; i<nzB; i++) { 1432 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1433 else break; 1434 } 1435 imark = i; 1436 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1437 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1438 } 1439 if (idx) { 1440 *idx = idx_p = mat->rowindices; 1441 if (imark > -1) { 1442 for (i=0; i<imark; i++) { 1443 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1444 } 1445 } else { 1446 for (i=0; i<nzB; i++) { 1447 if (cmap[cworkB[i]/bs] < cstart) 1448 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1449 else break; 1450 } 1451 imark = i; 1452 } 1453 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1454 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1455 } 1456 } else { 1457 if (idx) *idx = 0; 1458 if (v) *v = 0; 1459 } 1460 } 1461 *nz = nztot; 1462 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1463 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1464 PetscFunctionReturn(0); 1465 } 1466 1467 #undef __FUNCT__ 1468 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1469 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1470 { 1471 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1472 1473 PetscFunctionBegin; 1474 if (!baij->getrowactive) { 1475 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1476 } 1477 baij->getrowactive = PETSC_FALSE; 1478 PetscFunctionReturn(0); 1479 } 1480 1481 #undef __FUNCT__ 1482 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1483 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1484 { 1485 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1486 PetscErrorCode ierr; 1487 1488 PetscFunctionBegin; 1489 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1490 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1491 PetscFunctionReturn(0); 1492 } 1493 1494 #undef __FUNCT__ 1495 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1496 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1497 { 1498 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1499 Mat A = a->A,B = a->B; 1500 PetscErrorCode ierr; 1501 PetscReal isend[5],irecv[5]; 1502 1503 PetscFunctionBegin; 1504 info->block_size = (PetscReal)matin->bs; 1505 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1506 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1507 isend[3] = info->memory; isend[4] = info->mallocs; 1508 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1509 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1510 isend[3] += info->memory; isend[4] += info->mallocs; 1511 if (flag == MAT_LOCAL) { 1512 info->nz_used = isend[0]; 1513 info->nz_allocated = isend[1]; 1514 info->nz_unneeded = isend[2]; 1515 info->memory = isend[3]; 1516 info->mallocs = isend[4]; 1517 } else if (flag == MAT_GLOBAL_MAX) { 1518 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1519 info->nz_used = irecv[0]; 1520 info->nz_allocated = irecv[1]; 1521 info->nz_unneeded = irecv[2]; 1522 info->memory = irecv[3]; 1523 info->mallocs = irecv[4]; 1524 } else if (flag == MAT_GLOBAL_SUM) { 1525 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1526 info->nz_used = irecv[0]; 1527 info->nz_allocated = irecv[1]; 1528 info->nz_unneeded = irecv[2]; 1529 info->memory = irecv[3]; 1530 info->mallocs = irecv[4]; 1531 } else { 1532 SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1533 } 1534 info->rows_global = (PetscReal)A->M; 1535 info->columns_global = (PetscReal)A->N; 1536 info->rows_local = (PetscReal)A->m; 1537 info->columns_local = (PetscReal)A->N; 1538 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1539 info->fill_ratio_needed = 0; 1540 info->factor_mallocs = 0; 1541 PetscFunctionReturn(0); 1542 } 1543 1544 #undef __FUNCT__ 1545 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1546 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op) 1547 { 1548 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1549 PetscErrorCode ierr; 1550 1551 PetscFunctionBegin; 1552 switch (op) { 1553 case MAT_NO_NEW_NONZERO_LOCATIONS: 1554 case MAT_YES_NEW_NONZERO_LOCATIONS: 1555 case MAT_COLUMNS_UNSORTED: 1556 case MAT_COLUMNS_SORTED: 1557 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1558 case MAT_KEEP_ZEROED_ROWS: 1559 case MAT_NEW_NONZERO_LOCATION_ERR: 1560 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1561 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1562 break; 1563 case MAT_ROW_ORIENTED: 1564 a->roworiented = PETSC_TRUE; 1565 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1566 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1567 break; 1568 case MAT_ROWS_SORTED: 1569 case MAT_ROWS_UNSORTED: 1570 case MAT_YES_NEW_DIAGONALS: 1571 PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1572 break; 1573 case MAT_COLUMN_ORIENTED: 1574 a->roworiented = PETSC_FALSE; 1575 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1576 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1577 break; 1578 case MAT_IGNORE_OFF_PROC_ENTRIES: 1579 a->donotstash = PETSC_TRUE; 1580 break; 1581 case MAT_NO_NEW_DIAGONALS: 1582 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1583 case MAT_USE_HASH_TABLE: 1584 a->ht_flag = PETSC_TRUE; 1585 break; 1586 case MAT_SYMMETRIC: 1587 case MAT_STRUCTURALLY_SYMMETRIC: 1588 case MAT_HERMITIAN: 1589 case MAT_SYMMETRY_ETERNAL: 1590 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1591 break; 1592 case MAT_NOT_SYMMETRIC: 1593 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1594 case MAT_NOT_HERMITIAN: 1595 case MAT_NOT_SYMMETRY_ETERNAL: 1596 break; 1597 default: 1598 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1599 } 1600 PetscFunctionReturn(0); 1601 } 1602 1603 #undef __FUNCT__ 1604 #define __FUNCT__ "MatTranspose_MPIBAIJ(" 1605 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1606 { 1607 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1608 Mat_SeqBAIJ *Aloc; 1609 Mat B; 1610 PetscErrorCode ierr; 1611 PetscInt M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col; 1612 PetscInt bs=A->bs,mbs=baij->mbs; 1613 MatScalar *a; 1614 1615 PetscFunctionBegin; 1616 if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1617 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1618 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1619 ierr = MatMPIBAIJSetPreallocation(B,A->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1620 1621 /* copy over the A part */ 1622 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1623 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1624 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1625 1626 for (i=0; i<mbs; i++) { 1627 rvals[0] = bs*(baij->rstart + i); 1628 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1629 for (j=ai[i]; j<ai[i+1]; j++) { 1630 col = (baij->cstart+aj[j])*bs; 1631 for (k=0; k<bs; k++) { 1632 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1633 col++; a += bs; 1634 } 1635 } 1636 } 1637 /* copy over the B part */ 1638 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1639 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1640 for (i=0; i<mbs; i++) { 1641 rvals[0] = bs*(baij->rstart + i); 1642 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1643 for (j=ai[i]; j<ai[i+1]; j++) { 1644 col = baij->garray[aj[j]]*bs; 1645 for (k=0; k<bs; k++) { 1646 ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1647 col++; a += bs; 1648 } 1649 } 1650 } 1651 ierr = PetscFree(rvals);CHKERRQ(ierr); 1652 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1653 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1654 1655 if (matout) { 1656 *matout = B; 1657 } else { 1658 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1659 } 1660 PetscFunctionReturn(0); 1661 } 1662 1663 #undef __FUNCT__ 1664 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1665 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1666 { 1667 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1668 Mat a = baij->A,b = baij->B; 1669 PetscErrorCode ierr; 1670 PetscInt s1,s2,s3; 1671 1672 PetscFunctionBegin; 1673 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1674 if (rr) { 1675 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1676 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1677 /* Overlap communication with computation. */ 1678 ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1679 } 1680 if (ll) { 1681 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1682 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1683 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1684 } 1685 /* scale the diagonal block */ 1686 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1687 1688 if (rr) { 1689 /* Do a scatter end and then right scale the off-diagonal block */ 1690 ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1691 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1692 } 1693 1694 PetscFunctionReturn(0); 1695 } 1696 1697 #undef __FUNCT__ 1698 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1699 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag) 1700 { 1701 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1702 PetscErrorCode ierr; 1703 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1704 PetscInt i,N,*rows,*owners = l->rowners; 1705 PetscInt *nprocs,j,idx,nsends,row; 1706 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1707 PetscInt *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1; 1708 PetscInt *lens,*lrows,*values,bs=A->bs,rstart_bs=l->rstart_bs; 1709 MPI_Comm comm = A->comm; 1710 MPI_Request *send_waits,*recv_waits; 1711 MPI_Status recv_status,*send_status; 1712 IS istmp; 1713 #if defined(PETSC_DEBUG) 1714 PetscTruth found = PETSC_FALSE; 1715 #endif 1716 1717 PetscFunctionBegin; 1718 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 1719 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1720 1721 /* first count number of contributors to each processor */ 1722 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1723 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1724 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1725 j = 0; 1726 for (i=0; i<N; i++) { 1727 if (lastidx > (idx = rows[i])) j = 0; 1728 lastidx = idx; 1729 for (; j<size; j++) { 1730 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1731 nprocs[2*j]++; 1732 nprocs[2*j+1] = 1; 1733 owner[i] = j; 1734 #if defined(PETSC_DEBUG) 1735 found = PETSC_TRUE; 1736 #endif 1737 break; 1738 } 1739 } 1740 #if defined(PETSC_DEBUG) 1741 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1742 found = PETSC_FALSE; 1743 #endif 1744 } 1745 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1746 1747 /* inform other processors of number of messages and max length*/ 1748 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1749 1750 /* post receives: */ 1751 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1752 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1753 for (i=0; i<nrecvs; i++) { 1754 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1755 } 1756 1757 /* do sends: 1758 1) starts[i] gives the starting index in svalues for stuff going to 1759 the ith processor 1760 */ 1761 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1762 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1763 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1764 starts[0] = 0; 1765 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1766 for (i=0; i<N; i++) { 1767 svalues[starts[owner[i]]++] = rows[i]; 1768 } 1769 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1770 1771 starts[0] = 0; 1772 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1773 count = 0; 1774 for (i=0; i<size; i++) { 1775 if (nprocs[2*i+1]) { 1776 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1777 } 1778 } 1779 ierr = PetscFree(starts);CHKERRQ(ierr); 1780 1781 base = owners[rank]*bs; 1782 1783 /* wait on receives */ 1784 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 1785 source = lens + nrecvs; 1786 count = nrecvs; slen = 0; 1787 while (count) { 1788 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1789 /* unpack receives into our local space */ 1790 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1791 source[imdex] = recv_status.MPI_SOURCE; 1792 lens[imdex] = n; 1793 slen += n; 1794 count--; 1795 } 1796 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1797 1798 /* move the data into the send scatter */ 1799 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1800 count = 0; 1801 for (i=0; i<nrecvs; i++) { 1802 values = rvalues + i*nmax; 1803 for (j=0; j<lens[i]; j++) { 1804 lrows[count++] = values[j] - base; 1805 } 1806 } 1807 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1808 ierr = PetscFree(lens);CHKERRQ(ierr); 1809 ierr = PetscFree(owner);CHKERRQ(ierr); 1810 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1811 1812 /* actually zap the local rows */ 1813 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1814 ierr = PetscLogObjectParent(A,istmp);CHKERRQ(ierr); 1815 1816 /* 1817 Zero the required rows. If the "diagonal block" of the matrix 1818 is square and the user wishes to set the diagonal we use seperate 1819 code so that MatSetValues() is not called for each diagonal allocating 1820 new memory, thus calling lots of mallocs and slowing things down. 1821 1822 Contributed by: Mathew Knepley 1823 */ 1824 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1825 ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1826 if (diag && (l->A->M == l->A->N)) { 1827 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1828 } else if (diag) { 1829 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1830 if (((Mat_SeqBAIJ*)l->A->data)->nonew) { 1831 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1832 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1833 } 1834 for (i=0; i<slen; i++) { 1835 row = lrows[i] + rstart_bs; 1836 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1837 } 1838 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1839 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1840 } else { 1841 ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1842 } 1843 1844 ierr = ISDestroy(istmp);CHKERRQ(ierr); 1845 ierr = PetscFree(lrows);CHKERRQ(ierr); 1846 1847 /* wait on sends */ 1848 if (nsends) { 1849 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1850 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1851 ierr = PetscFree(send_status);CHKERRQ(ierr); 1852 } 1853 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1854 ierr = PetscFree(svalues);CHKERRQ(ierr); 1855 1856 PetscFunctionReturn(0); 1857 } 1858 1859 #undef __FUNCT__ 1860 #define __FUNCT__ "MatPrintHelp_MPIBAIJ" 1861 PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A) 1862 { 1863 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1864 MPI_Comm comm = A->comm; 1865 static PetscTruth called = PETSC_FALSE; 1866 PetscErrorCode ierr; 1867 1868 PetscFunctionBegin; 1869 if (!a->rank) { 1870 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1871 } 1872 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1873 ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1874 ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1875 PetscFunctionReturn(0); 1876 } 1877 1878 #undef __FUNCT__ 1879 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1880 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1881 { 1882 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1883 PetscErrorCode ierr; 1884 1885 PetscFunctionBegin; 1886 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1887 PetscFunctionReturn(0); 1888 } 1889 1890 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1891 1892 #undef __FUNCT__ 1893 #define __FUNCT__ "MatEqual_MPIBAIJ" 1894 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag) 1895 { 1896 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1897 Mat a,b,c,d; 1898 PetscTruth flg; 1899 PetscErrorCode ierr; 1900 1901 PetscFunctionBegin; 1902 a = matA->A; b = matA->B; 1903 c = matB->A; d = matB->B; 1904 1905 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1906 if (flg) { 1907 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1908 } 1909 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1910 PetscFunctionReturn(0); 1911 } 1912 1913 1914 #undef __FUNCT__ 1915 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1916 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1917 { 1918 PetscErrorCode ierr; 1919 1920 PetscFunctionBegin; 1921 ierr = MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1922 PetscFunctionReturn(0); 1923 } 1924 1925 /* -------------------------------------------------------------------*/ 1926 static struct _MatOps MatOps_Values = { 1927 MatSetValues_MPIBAIJ, 1928 MatGetRow_MPIBAIJ, 1929 MatRestoreRow_MPIBAIJ, 1930 MatMult_MPIBAIJ, 1931 /* 4*/ MatMultAdd_MPIBAIJ, 1932 MatMultTranspose_MPIBAIJ, 1933 MatMultTransposeAdd_MPIBAIJ, 1934 0, 1935 0, 1936 0, 1937 /*10*/ 0, 1938 0, 1939 0, 1940 0, 1941 MatTranspose_MPIBAIJ, 1942 /*15*/ MatGetInfo_MPIBAIJ, 1943 MatEqual_MPIBAIJ, 1944 MatGetDiagonal_MPIBAIJ, 1945 MatDiagonalScale_MPIBAIJ, 1946 MatNorm_MPIBAIJ, 1947 /*20*/ MatAssemblyBegin_MPIBAIJ, 1948 MatAssemblyEnd_MPIBAIJ, 1949 0, 1950 MatSetOption_MPIBAIJ, 1951 MatZeroEntries_MPIBAIJ, 1952 /*25*/ MatZeroRows_MPIBAIJ, 1953 0, 1954 0, 1955 0, 1956 0, 1957 /*30*/ MatSetUpPreallocation_MPIBAIJ, 1958 0, 1959 0, 1960 0, 1961 0, 1962 /*35*/ MatDuplicate_MPIBAIJ, 1963 0, 1964 0, 1965 0, 1966 0, 1967 /*40*/ 0, 1968 MatGetSubMatrices_MPIBAIJ, 1969 MatIncreaseOverlap_MPIBAIJ, 1970 MatGetValues_MPIBAIJ, 1971 0, 1972 /*45*/ MatPrintHelp_MPIBAIJ, 1973 MatScale_MPIBAIJ, 1974 0, 1975 0, 1976 0, 1977 /*50*/ 0, 1978 0, 1979 0, 1980 0, 1981 0, 1982 /*55*/ 0, 1983 0, 1984 MatSetUnfactored_MPIBAIJ, 1985 0, 1986 MatSetValuesBlocked_MPIBAIJ, 1987 /*60*/ 0, 1988 MatDestroy_MPIBAIJ, 1989 MatView_MPIBAIJ, 1990 MatGetPetscMaps_Petsc, 1991 0, 1992 /*65*/ 0, 1993 0, 1994 0, 1995 0, 1996 0, 1997 /*70*/ MatGetRowMax_MPIBAIJ, 1998 0, 1999 0, 2000 0, 2001 0, 2002 /*75*/ 0, 2003 0, 2004 0, 2005 0, 2006 0, 2007 /*80*/ 0, 2008 0, 2009 0, 2010 0, 2011 MatLoad_MPIBAIJ, 2012 /*85*/ 0, 2013 0, 2014 0, 2015 0, 2016 0, 2017 /*90*/ 0, 2018 0, 2019 0, 2020 0, 2021 0, 2022 /*95*/ 0, 2023 0, 2024 0, 2025 0}; 2026 2027 2028 EXTERN_C_BEGIN 2029 #undef __FUNCT__ 2030 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2031 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2032 { 2033 PetscFunctionBegin; 2034 *a = ((Mat_MPIBAIJ *)A->data)->A; 2035 *iscopy = PETSC_FALSE; 2036 PetscFunctionReturn(0); 2037 } 2038 EXTERN_C_END 2039 2040 EXTERN_C_BEGIN 2041 extern int MatConvert_MPIBAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 2042 EXTERN_C_END 2043 2044 #undef __FUNCT__ 2045 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2046 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2047 { 2048 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2049 PetscInt m = B->m/bs,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2050 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2051 const PetscInt *JJ; 2052 PetscScalar *values; 2053 PetscErrorCode ierr; 2054 2055 PetscFunctionBegin; 2056 #if defined(PETSC_OPT_g) 2057 if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]); 2058 #endif 2059 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2060 o_nnz = d_nnz + m; 2061 2062 for (i=0; i<m; i++) { 2063 nnz = I[i+1]- I[i]; 2064 JJ = J + I[i]; 2065 nnz_max = PetscMax(nnz_max,nnz); 2066 #if defined(PETSC_OPT_g) 2067 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2068 #endif 2069 for (j=0; j<nnz; j++) { 2070 if (*JJ >= cstart) break; 2071 JJ++; 2072 } 2073 d = 0; 2074 for (; j<nnz; j++) { 2075 if (*JJ++ >= cend) break; 2076 d++; 2077 } 2078 d_nnz[i] = d; 2079 o_nnz[i] = nnz - d; 2080 } 2081 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2082 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2083 2084 if (v) values = (PetscScalar*)v; 2085 else { 2086 ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2087 ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2088 } 2089 2090 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2091 for (i=0; i<m; i++) { 2092 ii = i + rstart; 2093 nnz = I[i+1]- I[i]; 2094 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr); 2095 } 2096 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2097 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2098 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2099 2100 if (!v) { 2101 ierr = PetscFree(values);CHKERRQ(ierr); 2102 } 2103 PetscFunctionReturn(0); 2104 } 2105 2106 #undef __FUNCT__ 2107 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2108 /*@C 2109 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2110 (the default parallel PETSc format). 2111 2112 Collective on MPI_Comm 2113 2114 Input Parameters: 2115 + A - the matrix 2116 . i - the indices into j for the start of each local row (starts with zero) 2117 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2118 - v - optional values in the matrix 2119 2120 Level: developer 2121 2122 .keywords: matrix, aij, compressed row, sparse, parallel 2123 2124 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2125 @*/ 2126 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2127 { 2128 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 2129 2130 PetscFunctionBegin; 2131 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2132 if (f) { 2133 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 2134 } 2135 PetscFunctionReturn(0); 2136 } 2137 2138 EXTERN_C_BEGIN 2139 #undef __FUNCT__ 2140 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2141 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2142 { 2143 Mat_MPIBAIJ *b; 2144 PetscErrorCode ierr; 2145 PetscInt i; 2146 2147 PetscFunctionBegin; 2148 B->preallocated = PETSC_TRUE; 2149 ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2150 2151 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 2152 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2153 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2154 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 2155 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 2156 if (d_nnz) { 2157 for (i=0; i<B->m/bs; i++) { 2158 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2159 } 2160 } 2161 if (o_nnz) { 2162 for (i=0; i<B->m/bs; i++) { 2163 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2164 } 2165 } 2166 2167 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr); 2168 ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr); 2169 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 2170 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 2171 2172 b = (Mat_MPIBAIJ*)B->data; 2173 B->bs = bs; 2174 b->bs2 = bs*bs; 2175 b->mbs = B->m/bs; 2176 b->nbs = B->n/bs; 2177 b->Mbs = B->M/bs; 2178 b->Nbs = B->N/bs; 2179 2180 ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 2181 b->rowners[0] = 0; 2182 for (i=2; i<=b->size; i++) { 2183 b->rowners[i] += b->rowners[i-1]; 2184 } 2185 b->rstart = b->rowners[b->rank]; 2186 b->rend = b->rowners[b->rank+1]; 2187 2188 ierr = MPI_Allgather(&b->nbs,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 2189 b->cowners[0] = 0; 2190 for (i=2; i<=b->size; i++) { 2191 b->cowners[i] += b->cowners[i-1]; 2192 } 2193 b->cstart = b->cowners[b->rank]; 2194 b->cend = b->cowners[b->rank+1]; 2195 2196 for (i=0; i<=b->size; i++) { 2197 b->rowners_bs[i] = b->rowners[i]*bs; 2198 } 2199 b->rstart_bs = b->rstart*bs; 2200 b->rend_bs = b->rend*bs; 2201 b->cstart_bs = b->cstart*bs; 2202 b->cend_bs = b->cend*bs; 2203 2204 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 2205 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2206 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2207 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 2208 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 2209 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2210 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2211 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 2212 2213 ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2214 2215 PetscFunctionReturn(0); 2216 } 2217 EXTERN_C_END 2218 2219 EXTERN_C_BEGIN 2220 EXTERN PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2221 EXTERN PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2222 EXTERN_C_END 2223 2224 /*MC 2225 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2226 2227 Options Database Keys: 2228 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2229 2230 Level: beginner 2231 2232 .seealso: MatCreateMPIBAIJ 2233 M*/ 2234 2235 EXTERN_C_BEGIN 2236 #undef __FUNCT__ 2237 #define __FUNCT__ "MatCreate_MPIBAIJ" 2238 PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2239 { 2240 Mat_MPIBAIJ *b; 2241 PetscErrorCode ierr; 2242 PetscTruth flg; 2243 2244 PetscFunctionBegin; 2245 ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr); 2246 B->data = (void*)b; 2247 2248 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2249 B->mapping = 0; 2250 B->factor = 0; 2251 B->assembled = PETSC_FALSE; 2252 2253 B->insertmode = NOT_SET_VALUES; 2254 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 2255 ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr); 2256 2257 /* build local table of row and column ownerships */ 2258 ierr = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 2259 ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));CHKERRQ(ierr); 2260 b->cowners = b->rowners + b->size + 2; 2261 b->rowners_bs = b->cowners + b->size + 2; 2262 2263 /* build cache for off array entries formed */ 2264 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2265 b->donotstash = PETSC_FALSE; 2266 b->colmap = PETSC_NULL; 2267 b->garray = PETSC_NULL; 2268 b->roworiented = PETSC_TRUE; 2269 2270 #if defined(PETSC_USE_MAT_SINGLE) 2271 /* stuff for MatSetValues_XXX in single precision */ 2272 b->setvalueslen = 0; 2273 b->setvaluescopy = PETSC_NULL; 2274 #endif 2275 2276 /* stuff used in block assembly */ 2277 b->barray = 0; 2278 2279 /* stuff used for matrix vector multiply */ 2280 b->lvec = 0; 2281 b->Mvctx = 0; 2282 2283 /* stuff for MatGetRow() */ 2284 b->rowindices = 0; 2285 b->rowvalues = 0; 2286 b->getrowactive = PETSC_FALSE; 2287 2288 /* hash table stuff */ 2289 b->ht = 0; 2290 b->hd = 0; 2291 b->ht_size = 0; 2292 b->ht_flag = PETSC_FALSE; 2293 b->ht_fact = 0; 2294 b->ht_total_ct = 0; 2295 b->ht_insert_ct = 0; 2296 2297 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2298 if (flg) { 2299 PetscReal fact = 1.39; 2300 ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2301 ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2302 if (fact <= 1.0) fact = 1.39; 2303 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2304 PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2305 } 2306 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2307 "MatStoreValues_MPIBAIJ", 2308 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2309 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2310 "MatRetrieveValues_MPIBAIJ", 2311 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2312 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2313 "MatGetDiagonalBlock_MPIBAIJ", 2314 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 2315 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 2316 "MatMPIBAIJSetPreallocation_MPIBAIJ", 2317 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2318 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 2319 "MatMPIBAIJSetPreallocationCSR_MPIAIJ", 2320 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 2321 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 2322 "MatDiagonalScaleLocal_MPIBAIJ", 2323 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2324 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 2325 "MatSetHashTableFactor_MPIBAIJ", 2326 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2327 PetscFunctionReturn(0); 2328 } 2329 EXTERN_C_END 2330 2331 /*MC 2332 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2333 2334 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2335 and MATMPIBAIJ otherwise. 2336 2337 Options Database Keys: 2338 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2339 2340 Level: beginner 2341 2342 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2343 M*/ 2344 2345 EXTERN_C_BEGIN 2346 #undef __FUNCT__ 2347 #define __FUNCT__ "MatCreate_BAIJ" 2348 PetscErrorCode MatCreate_BAIJ(Mat A) 2349 { 2350 PetscErrorCode ierr; 2351 PetscMPIInt size; 2352 2353 PetscFunctionBegin; 2354 ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr); 2355 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2356 if (size == 1) { 2357 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 2358 } else { 2359 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 2360 } 2361 PetscFunctionReturn(0); 2362 } 2363 EXTERN_C_END 2364 2365 #undef __FUNCT__ 2366 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 2367 /*@C 2368 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2369 (block compressed row). For good matrix assembly performance 2370 the user should preallocate the matrix storage by setting the parameters 2371 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2372 performance can be increased by more than a factor of 50. 2373 2374 Collective on Mat 2375 2376 Input Parameters: 2377 + A - the matrix 2378 . bs - size of blockk 2379 . d_nz - number of block nonzeros per block row in diagonal portion of local 2380 submatrix (same for all local rows) 2381 . d_nnz - array containing the number of block nonzeros in the various block rows 2382 of the in diagonal portion of the local (possibly different for each block 2383 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2384 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2385 submatrix (same for all local rows). 2386 - o_nnz - array containing the number of nonzeros in the various block rows of the 2387 off-diagonal portion of the local submatrix (possibly different for 2388 each block row) or PETSC_NULL. 2389 2390 If the *_nnz parameter is given then the *_nz parameter is ignored 2391 2392 Options Database Keys: 2393 . -mat_no_unroll - uses code that does not unroll the loops in the 2394 block calculations (much slower) 2395 . -mat_block_size - size of the blocks to use 2396 2397 Notes: 2398 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2399 than it must be used on all processors that share the object for that argument. 2400 2401 Storage Information: 2402 For a square global matrix we define each processor's diagonal portion 2403 to be its local rows and the corresponding columns (a square submatrix); 2404 each processor's off-diagonal portion encompasses the remainder of the 2405 local matrix (a rectangular submatrix). 2406 2407 The user can specify preallocated storage for the diagonal part of 2408 the local submatrix with either d_nz or d_nnz (not both). Set 2409 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2410 memory allocation. Likewise, specify preallocated storage for the 2411 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2412 2413 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2414 the figure below we depict these three local rows and all columns (0-11). 2415 2416 .vb 2417 0 1 2 3 4 5 6 7 8 9 10 11 2418 ------------------- 2419 row 3 | o o o d d d o o o o o o 2420 row 4 | o o o d d d o o o o o o 2421 row 5 | o o o d d d o o o o o o 2422 ------------------- 2423 .ve 2424 2425 Thus, any entries in the d locations are stored in the d (diagonal) 2426 submatrix, and any entries in the o locations are stored in the 2427 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2428 stored simply in the MATSEQBAIJ format for compressed row storage. 2429 2430 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2431 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2432 In general, for PDE problems in which most nonzeros are near the diagonal, 2433 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2434 or you will get TERRIBLE performance; see the users' manual chapter on 2435 matrices. 2436 2437 Level: intermediate 2438 2439 .keywords: matrix, block, aij, compressed row, sparse, parallel 2440 2441 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 2442 @*/ 2443 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2444 { 2445 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2446 2447 PetscFunctionBegin; 2448 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2449 if (f) { 2450 ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2451 } 2452 PetscFunctionReturn(0); 2453 } 2454 2455 #undef __FUNCT__ 2456 #define __FUNCT__ "MatCreateMPIBAIJ" 2457 /*@C 2458 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 2459 (block compressed row). For good matrix assembly performance 2460 the user should preallocate the matrix storage by setting the parameters 2461 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2462 performance can be increased by more than a factor of 50. 2463 2464 Collective on MPI_Comm 2465 2466 Input Parameters: 2467 + comm - MPI communicator 2468 . bs - size of blockk 2469 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2470 This value should be the same as the local size used in creating the 2471 y vector for the matrix-vector product y = Ax. 2472 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2473 This value should be the same as the local size used in creating the 2474 x vector for the matrix-vector product y = Ax. 2475 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2476 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2477 . d_nz - number of nonzero blocks per block row in diagonal portion of local 2478 submatrix (same for all local rows) 2479 . d_nnz - array containing the number of nonzero blocks in the various block rows 2480 of the in diagonal portion of the local (possibly different for each block 2481 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2482 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 2483 submatrix (same for all local rows). 2484 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 2485 off-diagonal portion of the local submatrix (possibly different for 2486 each block row) or PETSC_NULL. 2487 2488 Output Parameter: 2489 . A - the matrix 2490 2491 Options Database Keys: 2492 . -mat_no_unroll - uses code that does not unroll the loops in the 2493 block calculations (much slower) 2494 . -mat_block_size - size of the blocks to use 2495 2496 Notes: 2497 If the *_nnz parameter is given then the *_nz parameter is ignored 2498 2499 A nonzero block is any block that as 1 or more nonzeros in it 2500 2501 The user MUST specify either the local or global matrix dimensions 2502 (possibly both). 2503 2504 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2505 than it must be used on all processors that share the object for that argument. 2506 2507 Storage Information: 2508 For a square global matrix we define each processor's diagonal portion 2509 to be its local rows and the corresponding columns (a square submatrix); 2510 each processor's off-diagonal portion encompasses the remainder of the 2511 local matrix (a rectangular submatrix). 2512 2513 The user can specify preallocated storage for the diagonal part of 2514 the local submatrix with either d_nz or d_nnz (not both). Set 2515 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2516 memory allocation. Likewise, specify preallocated storage for the 2517 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2518 2519 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2520 the figure below we depict these three local rows and all columns (0-11). 2521 2522 .vb 2523 0 1 2 3 4 5 6 7 8 9 10 11 2524 ------------------- 2525 row 3 | o o o d d d o o o o o o 2526 row 4 | o o o d d d o o o o o o 2527 row 5 | o o o d d d o o o o o o 2528 ------------------- 2529 .ve 2530 2531 Thus, any entries in the d locations are stored in the d (diagonal) 2532 submatrix, and any entries in the o locations are stored in the 2533 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2534 stored simply in the MATSEQBAIJ format for compressed row storage. 2535 2536 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2537 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2538 In general, for PDE problems in which most nonzeros are near the diagonal, 2539 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2540 or you will get TERRIBLE performance; see the users' manual chapter on 2541 matrices. 2542 2543 Level: intermediate 2544 2545 .keywords: matrix, block, aij, compressed row, sparse, parallel 2546 2547 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2548 @*/ 2549 PetscErrorCode MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2550 { 2551 PetscErrorCode ierr; 2552 PetscMPIInt size; 2553 2554 PetscFunctionBegin; 2555 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2556 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2557 if (size > 1) { 2558 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 2559 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2560 } else { 2561 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2562 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2563 } 2564 PetscFunctionReturn(0); 2565 } 2566 2567 #undef __FUNCT__ 2568 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 2569 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2570 { 2571 Mat mat; 2572 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 2573 PetscErrorCode ierr; 2574 PetscInt len=0; 2575 2576 PetscFunctionBegin; 2577 *newmat = 0; 2578 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 2579 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 2580 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2581 2582 mat->factor = matin->factor; 2583 mat->preallocated = PETSC_TRUE; 2584 mat->assembled = PETSC_TRUE; 2585 mat->insertmode = NOT_SET_VALUES; 2586 2587 a = (Mat_MPIBAIJ*)mat->data; 2588 mat->bs = matin->bs; 2589 a->bs2 = oldmat->bs2; 2590 a->mbs = oldmat->mbs; 2591 a->nbs = oldmat->nbs; 2592 a->Mbs = oldmat->Mbs; 2593 a->Nbs = oldmat->Nbs; 2594 2595 a->rstart = oldmat->rstart; 2596 a->rend = oldmat->rend; 2597 a->cstart = oldmat->cstart; 2598 a->cend = oldmat->cend; 2599 a->size = oldmat->size; 2600 a->rank = oldmat->rank; 2601 a->donotstash = oldmat->donotstash; 2602 a->roworiented = oldmat->roworiented; 2603 a->rowindices = 0; 2604 a->rowvalues = 0; 2605 a->getrowactive = PETSC_FALSE; 2606 a->barray = 0; 2607 a->rstart_bs = oldmat->rstart_bs; 2608 a->rend_bs = oldmat->rend_bs; 2609 a->cstart_bs = oldmat->cstart_bs; 2610 a->cend_bs = oldmat->cend_bs; 2611 2612 /* hash table stuff */ 2613 a->ht = 0; 2614 a->hd = 0; 2615 a->ht_size = 0; 2616 a->ht_flag = oldmat->ht_flag; 2617 a->ht_fact = oldmat->ht_fact; 2618 a->ht_total_ct = 0; 2619 a->ht_insert_ct = 0; 2620 2621 ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2622 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2623 ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr); 2624 if (oldmat->colmap) { 2625 #if defined (PETSC_USE_CTABLE) 2626 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2627 #else 2628 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2629 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2630 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2631 #endif 2632 } else a->colmap = 0; 2633 2634 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2635 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2636 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2637 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2638 } else a->garray = 0; 2639 2640 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2641 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2642 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2643 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2644 2645 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2646 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2647 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2648 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2649 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2650 *newmat = mat; 2651 2652 PetscFunctionReturn(0); 2653 } 2654 2655 #include "petscsys.h" 2656 2657 #undef __FUNCT__ 2658 #define __FUNCT__ "MatLoad_MPIBAIJ" 2659 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 2660 { 2661 Mat A; 2662 PetscErrorCode ierr; 2663 int fd; 2664 PetscInt i,nz,j,rstart,rend; 2665 PetscScalar *vals,*buf; 2666 MPI_Comm comm = ((PetscObject)viewer)->comm; 2667 MPI_Status status; 2668 PetscMPIInt rank,size,maxnz; 2669 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 2670 PetscInt *locrowlens,*procsnz = 0,*browners; 2671 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 2672 PetscMPIInt tag = ((PetscObject)viewer)->tag; 2673 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2674 PetscInt dcount,kmax,k,nzcount,tmp,mend; 2675 2676 PetscFunctionBegin; 2677 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2678 2679 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2680 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2681 if (!rank) { 2682 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2683 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2684 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2685 } 2686 2687 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2688 M = header[1]; N = header[2]; 2689 2690 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2691 2692 /* 2693 This code adds extra rows to make sure the number of rows is 2694 divisible by the blocksize 2695 */ 2696 Mbs = M/bs; 2697 extra_rows = bs - M + bs*Mbs; 2698 if (extra_rows == bs) extra_rows = 0; 2699 else Mbs++; 2700 if (extra_rows && !rank) { 2701 PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2702 } 2703 2704 /* determine ownership of all rows */ 2705 mbs = Mbs/size + ((Mbs % size) > rank); 2706 m = mbs*bs; 2707 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 2708 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2709 2710 /* process 0 needs enough room for process with most rows */ 2711 if (!rank) { 2712 mmax = rowners[1]; 2713 for (i=2; i<size; i++) { 2714 mmax = PetscMax(mmax,rowners[i]); 2715 } 2716 } else mmax = m; 2717 2718 rowners[0] = 0; 2719 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2720 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2721 rstart = rowners[rank]; 2722 rend = rowners[rank+1]; 2723 2724 /* distribute row lengths to all processors */ 2725 ierr = PetscMalloc(mmax*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 2726 if (!rank) { 2727 mend = m; 2728 if (size == 1) mend = mend - extra_rows; 2729 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 2730 for (j=mend; j<m; j++) locrowlens[j] = 1; 2731 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2732 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2733 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2734 for (j=0; j<m; j++) { 2735 procsnz[0] += locrowlens[j]; 2736 } 2737 for (i=1; i<size; i++) { 2738 mend = browners[i+1] - browners[i]; 2739 if (i == size-1) mend = mend - extra_rows; 2740 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 2741 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 2742 /* calculate the number of nonzeros on each processor */ 2743 for (j=0; j<browners[i+1]-browners[i]; j++) { 2744 procsnz[i] += rowlengths[j]; 2745 } 2746 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2747 } 2748 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2749 } else { 2750 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2751 } 2752 2753 if (!rank) { 2754 /* determine max buffer needed and allocate it */ 2755 maxnz = procsnz[0]; 2756 for (i=1; i<size; i++) { 2757 maxnz = PetscMax(maxnz,procsnz[i]); 2758 } 2759 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2760 2761 /* read in my part of the matrix column indices */ 2762 nz = procsnz[0]; 2763 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2764 mycols = ibuf; 2765 if (size == 1) nz -= extra_rows; 2766 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2767 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2768 2769 /* read in every ones (except the last) and ship off */ 2770 for (i=1; i<size-1; i++) { 2771 nz = procsnz[i]; 2772 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2773 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2774 } 2775 /* read in the stuff for the last proc */ 2776 if (size != 1) { 2777 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2778 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2779 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2780 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2781 } 2782 ierr = PetscFree(cols);CHKERRQ(ierr); 2783 } else { 2784 /* determine buffer space needed for message */ 2785 nz = 0; 2786 for (i=0; i<m; i++) { 2787 nz += locrowlens[i]; 2788 } 2789 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2790 mycols = ibuf; 2791 /* receive message of column indices*/ 2792 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2793 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2794 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2795 } 2796 2797 /* loop over local rows, determining number of off diagonal entries */ 2798 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2799 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2800 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2801 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2802 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2803 rowcount = 0; nzcount = 0; 2804 for (i=0; i<mbs; i++) { 2805 dcount = 0; 2806 odcount = 0; 2807 for (j=0; j<bs; j++) { 2808 kmax = locrowlens[rowcount]; 2809 for (k=0; k<kmax; k++) { 2810 tmp = mycols[nzcount++]/bs; 2811 if (!mask[tmp]) { 2812 mask[tmp] = 1; 2813 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2814 else masked1[dcount++] = tmp; 2815 } 2816 } 2817 rowcount++; 2818 } 2819 2820 dlens[i] = dcount; 2821 odlens[i] = odcount; 2822 2823 /* zero out the mask elements we set */ 2824 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2825 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2826 } 2827 2828 /* create our matrix */ 2829 ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr); 2830 ierr = MatSetType(A,type);CHKERRQ(ierr) 2831 ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2832 2833 /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */ 2834 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2835 2836 if (!rank) { 2837 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2838 /* read in my part of the matrix numerical values */ 2839 nz = procsnz[0]; 2840 vals = buf; 2841 mycols = ibuf; 2842 if (size == 1) nz -= extra_rows; 2843 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2844 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2845 2846 /* insert into matrix */ 2847 jj = rstart*bs; 2848 for (i=0; i<m; i++) { 2849 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2850 mycols += locrowlens[i]; 2851 vals += locrowlens[i]; 2852 jj++; 2853 } 2854 /* read in other processors (except the last one) and ship out */ 2855 for (i=1; i<size-1; i++) { 2856 nz = procsnz[i]; 2857 vals = buf; 2858 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2859 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2860 } 2861 /* the last proc */ 2862 if (size != 1){ 2863 nz = procsnz[i] - extra_rows; 2864 vals = buf; 2865 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2866 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2867 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2868 } 2869 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2870 } else { 2871 /* receive numeric values */ 2872 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2873 2874 /* receive message of values*/ 2875 vals = buf; 2876 mycols = ibuf; 2877 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2878 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2879 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2880 2881 /* insert into matrix */ 2882 jj = rstart*bs; 2883 for (i=0; i<m; i++) { 2884 ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2885 mycols += locrowlens[i]; 2886 vals += locrowlens[i]; 2887 jj++; 2888 } 2889 } 2890 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2891 ierr = PetscFree(buf);CHKERRQ(ierr); 2892 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2893 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2894 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2895 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2896 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2897 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2898 2899 *newmat = A; 2900 PetscFunctionReturn(0); 2901 } 2902 2903 #undef __FUNCT__ 2904 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 2905 /*@ 2906 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2907 2908 Input Parameters: 2909 . mat - the matrix 2910 . fact - factor 2911 2912 Collective on Mat 2913 2914 Level: advanced 2915 2916 Notes: 2917 This can also be set by the command line option: -mat_use_hash_table fact 2918 2919 .keywords: matrix, hashtable, factor, HT 2920 2921 .seealso: MatSetOption() 2922 @*/ 2923 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2924 { 2925 PetscErrorCode ierr,(*f)(Mat,PetscReal); 2926 2927 PetscFunctionBegin; 2928 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr); 2929 if (f) { 2930 ierr = (*f)(mat,fact);CHKERRQ(ierr); 2931 } 2932 PetscFunctionReturn(0); 2933 } 2934 2935 #undef __FUNCT__ 2936 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 2937 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 2938 { 2939 Mat_MPIBAIJ *baij; 2940 2941 PetscFunctionBegin; 2942 baij = (Mat_MPIBAIJ*)mat->data; 2943 baij->ht_fact = fact; 2944 PetscFunctionReturn(0); 2945 } 2946 2947 #undef __FUNCT__ 2948 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 2949 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2950 { 2951 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2952 PetscFunctionBegin; 2953 *Ad = a->A; 2954 *Ao = a->B; 2955 *colmap = a->garray; 2956 PetscFunctionReturn(0); 2957 } 2958