1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 #if defined(PETSC_HAVE_HYPRE) 8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 9 #endif 10 11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 12 { 13 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 14 PetscErrorCode ierr; 15 PetscInt i,*idxb = NULL,m = A->rmap->n,bs = A->cmap->bs; 16 PetscScalar *va,*vv; 17 Vec vB,vA; 18 const PetscScalar *vb; 19 20 PetscFunctionBegin; 21 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr); 22 ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr); 23 24 ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr); 25 if (idx) { 26 for (i=0; i<m; i++) { 27 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 28 } 29 } 30 31 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr); 32 ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr); 33 ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr); 34 35 ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr); 36 ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr); 37 for (i=0; i<m; i++) { 38 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 39 vv[i] = vb[i]; 40 if (idx) idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs); 41 } else { 42 vv[i] = va[i]; 43 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs*a->garray[idxb[i]/bs] + (idxb[i] % bs)) 44 idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs); 45 } 46 } 47 ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr); 48 ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr); 49 ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr); 50 ierr = PetscFree(idxb);CHKERRQ(ierr); 51 ierr = VecDestroy(&vA);CHKERRQ(ierr); 52 ierr = VecDestroy(&vB);CHKERRQ(ierr); 53 PetscFunctionReturn(0); 54 } 55 56 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 57 { 58 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 59 PetscErrorCode ierr; 60 61 PetscFunctionBegin; 62 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 63 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 64 PetscFunctionReturn(0); 65 } 66 67 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 68 { 69 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 70 PetscErrorCode ierr; 71 72 PetscFunctionBegin; 73 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 74 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 75 PetscFunctionReturn(0); 76 } 77 78 /* 79 Local utility routine that creates a mapping from the global column 80 number to the local number in the off-diagonal part of the local 81 storage of the matrix. This is done in a non scalable way since the 82 length of colmap equals the global matrix length. 83 */ 84 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 85 { 86 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 87 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 88 PetscErrorCode ierr; 89 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 90 91 PetscFunctionBegin; 92 #if defined(PETSC_USE_CTABLE) 93 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 94 for (i=0; i<nbs; i++) { 95 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 96 } 97 #else 98 ierr = PetscCalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 99 ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 100 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 101 #endif 102 PetscFunctionReturn(0); 103 } 104 105 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 106 { \ 107 brow = row/bs; \ 108 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 109 rmax = aimax[brow]; nrow = ailen[brow]; \ 110 bcol = col/bs; \ 111 ridx = row % bs; cidx = col % bs; \ 112 low = 0; high = nrow; \ 113 while (high-low > 3) { \ 114 t = (low+high)/2; \ 115 if (rp[t] > bcol) high = t; \ 116 else low = t; \ 117 } \ 118 for (_i=low; _i<high; _i++) { \ 119 if (rp[_i] > bcol) break; \ 120 if (rp[_i] == bcol) { \ 121 bap = ap + bs2*_i + bs*cidx + ridx; \ 122 if (addv == ADD_VALUES) *bap += value; \ 123 else *bap = value; \ 124 goto a_noinsert; \ 125 } \ 126 } \ 127 if (a->nonew == 1) goto a_noinsert; \ 128 PetscCheckFalse(a->nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 129 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 130 N = nrow++ - 1; \ 131 /* shift up all the later entries in this row */ \ 132 ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\ 133 ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \ 134 ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \ 135 rp[_i] = bcol; \ 136 ap[bs2*_i + bs*cidx + ridx] = value; \ 137 a_noinsert:; \ 138 ailen[brow] = nrow; \ 139 } 140 141 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 142 { \ 143 brow = row/bs; \ 144 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 145 rmax = bimax[brow]; nrow = bilen[brow]; \ 146 bcol = col/bs; \ 147 ridx = row % bs; cidx = col % bs; \ 148 low = 0; high = nrow; \ 149 while (high-low > 3) { \ 150 t = (low+high)/2; \ 151 if (rp[t] > bcol) high = t; \ 152 else low = t; \ 153 } \ 154 for (_i=low; _i<high; _i++) { \ 155 if (rp[_i] > bcol) break; \ 156 if (rp[_i] == bcol) { \ 157 bap = ap + bs2*_i + bs*cidx + ridx; \ 158 if (addv == ADD_VALUES) *bap += value; \ 159 else *bap = value; \ 160 goto b_noinsert; \ 161 } \ 162 } \ 163 if (b->nonew == 1) goto b_noinsert; \ 164 PetscCheckFalse(b->nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 165 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 166 N = nrow++ - 1; \ 167 /* shift up all the later entries in this row */ \ 168 ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\ 169 ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr);\ 170 ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \ 171 rp[_i] = bcol; \ 172 ap[bs2*_i + bs*cidx + ridx] = value; \ 173 b_noinsert:; \ 174 bilen[brow] = nrow; \ 175 } 176 177 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 178 { 179 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 180 MatScalar value; 181 PetscBool roworiented = baij->roworiented; 182 PetscErrorCode ierr; 183 PetscInt i,j,row,col; 184 PetscInt rstart_orig=mat->rmap->rstart; 185 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 186 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 187 188 /* Some Variables required in the macro */ 189 Mat A = baij->A; 190 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 191 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 192 MatScalar *aa =a->a; 193 194 Mat B = baij->B; 195 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 196 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 197 MatScalar *ba =b->a; 198 199 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 200 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 201 MatScalar *ap,*bap; 202 203 PetscFunctionBegin; 204 for (i=0; i<m; i++) { 205 if (im[i] < 0) continue; 206 PetscCheckFalse(im[i] >= mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 207 if (im[i] >= rstart_orig && im[i] < rend_orig) { 208 row = im[i] - rstart_orig; 209 for (j=0; j<n; j++) { 210 if (in[j] >= cstart_orig && in[j] < cend_orig) { 211 col = in[j] - cstart_orig; 212 if (roworiented) value = v[i*n+j]; 213 else value = v[i+j*m]; 214 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 215 } else if (in[j] < 0) continue; 216 else PetscCheckFalse(in[j] >= mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 217 else { 218 if (mat->was_assembled) { 219 if (!baij->colmap) { 220 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 221 } 222 #if defined(PETSC_USE_CTABLE) 223 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 224 col = col - 1; 225 #else 226 col = baij->colmap[in[j]/bs] - 1; 227 #endif 228 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 229 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 230 col = in[j]; 231 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 232 B = baij->B; 233 b = (Mat_SeqBAIJ*)(B)->data; 234 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 235 ba =b->a; 236 } else PetscCheckFalse(col < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 237 else col += in[j]%bs; 238 } else col = in[j]; 239 if (roworiented) value = v[i*n+j]; 240 else value = v[i+j*m]; 241 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 242 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 243 } 244 } 245 } else { 246 PetscCheckFalse(mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 247 if (!baij->donotstash) { 248 mat->assembled = PETSC_FALSE; 249 if (roworiented) { 250 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 251 } else { 252 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 253 } 254 } 255 } 256 } 257 PetscFunctionReturn(0); 258 } 259 260 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 261 { 262 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 263 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 264 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 265 PetscErrorCode ierr; 266 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 267 PetscBool roworiented=a->roworiented; 268 const PetscScalar *value = v; 269 MatScalar *ap,*aa = a->a,*bap; 270 271 PetscFunctionBegin; 272 rp = aj + ai[row]; 273 ap = aa + bs2*ai[row]; 274 rmax = imax[row]; 275 nrow = ailen[row]; 276 value = v; 277 low = 0; 278 high = nrow; 279 while (high-low > 7) { 280 t = (low+high)/2; 281 if (rp[t] > col) high = t; 282 else low = t; 283 } 284 for (i=low; i<high; i++) { 285 if (rp[i] > col) break; 286 if (rp[i] == col) { 287 bap = ap + bs2*i; 288 if (roworiented) { 289 if (is == ADD_VALUES) { 290 for (ii=0; ii<bs; ii++) { 291 for (jj=ii; jj<bs2; jj+=bs) { 292 bap[jj] += *value++; 293 } 294 } 295 } else { 296 for (ii=0; ii<bs; ii++) { 297 for (jj=ii; jj<bs2; jj+=bs) { 298 bap[jj] = *value++; 299 } 300 } 301 } 302 } else { 303 if (is == ADD_VALUES) { 304 for (ii=0; ii<bs; ii++,value+=bs) { 305 for (jj=0; jj<bs; jj++) { 306 bap[jj] += value[jj]; 307 } 308 bap += bs; 309 } 310 } else { 311 for (ii=0; ii<bs; ii++,value+=bs) { 312 for (jj=0; jj<bs; jj++) { 313 bap[jj] = value[jj]; 314 } 315 bap += bs; 316 } 317 } 318 } 319 goto noinsert2; 320 } 321 } 322 if (nonew == 1) goto noinsert2; 323 PetscCheckFalse(nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 324 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 325 N = nrow++ - 1; high++; 326 /* shift up all the later entries in this row */ 327 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 328 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 329 rp[i] = col; 330 bap = ap + bs2*i; 331 if (roworiented) { 332 for (ii=0; ii<bs; ii++) { 333 for (jj=ii; jj<bs2; jj+=bs) { 334 bap[jj] = *value++; 335 } 336 } 337 } else { 338 for (ii=0; ii<bs; ii++) { 339 for (jj=0; jj<bs; jj++) { 340 *bap++ = *value++; 341 } 342 } 343 } 344 noinsert2:; 345 ailen[row] = nrow; 346 PetscFunctionReturn(0); 347 } 348 349 /* 350 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 351 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 352 */ 353 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 354 { 355 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 356 const PetscScalar *value; 357 MatScalar *barray = baij->barray; 358 PetscBool roworiented = baij->roworiented; 359 PetscErrorCode ierr; 360 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 361 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 362 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 363 364 PetscFunctionBegin; 365 if (!barray) { 366 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 367 baij->barray = barray; 368 } 369 370 if (roworiented) stepval = (n-1)*bs; 371 else stepval = (m-1)*bs; 372 373 for (i=0; i<m; i++) { 374 if (im[i] < 0) continue; 375 PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1); 376 if (im[i] >= rstart && im[i] < rend) { 377 row = im[i] - rstart; 378 for (j=0; j<n; j++) { 379 /* If NumCol = 1 then a copy is not required */ 380 if ((roworiented) && (n == 1)) { 381 barray = (MatScalar*)v + i*bs2; 382 } else if ((!roworiented) && (m == 1)) { 383 barray = (MatScalar*)v + j*bs2; 384 } else { /* Here a copy is required */ 385 if (roworiented) { 386 value = v + (i*(stepval+bs) + j)*bs; 387 } else { 388 value = v + (j*(stepval+bs) + i)*bs; 389 } 390 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 391 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 392 barray += bs; 393 } 394 barray -= bs2; 395 } 396 397 if (in[j] >= cstart && in[j] < cend) { 398 col = in[j] - cstart; 399 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 400 } else if (in[j] < 0) continue; 401 else PetscCheck(in[j] < baij->Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT,in[j],baij->Nbs-1); 402 else { 403 if (mat->was_assembled) { 404 if (!baij->colmap) { 405 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 406 } 407 408 #if defined(PETSC_USE_DEBUG) 409 #if defined(PETSC_USE_CTABLE) 410 { PetscInt data; 411 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 412 PetscCheckFalse((data - 1) % bs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 413 } 414 #else 415 PetscCheckFalse((baij->colmap[in[j]] - 1) % bs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 416 #endif 417 #endif 418 #if defined(PETSC_USE_CTABLE) 419 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 420 col = (col - 1)/bs; 421 #else 422 col = (baij->colmap[in[j]] - 1)/bs; 423 #endif 424 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 425 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 426 col = in[j]; 427 } else PetscCheckFalse(col < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix",im[i],in[j]); 428 } else col = in[j]; 429 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 430 } 431 } 432 } else { 433 PetscCheckFalse(mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 434 if (!baij->donotstash) { 435 if (roworiented) { 436 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 437 } else { 438 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 439 } 440 } 441 } 442 } 443 PetscFunctionReturn(0); 444 } 445 446 #define HASH_KEY 0.6180339887 447 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 448 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 449 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 450 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 451 { 452 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 453 PetscBool roworiented = baij->roworiented; 454 PetscErrorCode ierr; 455 PetscInt i,j,row,col; 456 PetscInt rstart_orig=mat->rmap->rstart; 457 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 458 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 459 PetscReal tmp; 460 MatScalar **HD = baij->hd,value; 461 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 462 463 PetscFunctionBegin; 464 for (i=0; i<m; i++) { 465 if (PetscDefined(USE_DEBUG)) { 466 PetscCheckFalse(im[i] < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 467 PetscCheckFalse(im[i] >= mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 468 } 469 row = im[i]; 470 if (row >= rstart_orig && row < rend_orig) { 471 for (j=0; j<n; j++) { 472 col = in[j]; 473 if (roworiented) value = v[i*n+j]; 474 else value = v[i+j*m]; 475 /* Look up PetscInto the Hash Table */ 476 key = (row/bs)*Nbs+(col/bs)+1; 477 h1 = HASH(size,key,tmp); 478 479 idx = h1; 480 if (PetscDefined(USE_DEBUG)) { 481 insert_ct++; 482 total_ct++; 483 if (HT[idx] != key) { 484 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 485 if (idx == size) { 486 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 487 PetscCheckFalse(idx == h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 488 } 489 } 490 } else if (HT[idx] != key) { 491 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 492 if (idx == size) { 493 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 494 PetscCheckFalse(idx == h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 495 } 496 } 497 /* A HASH table entry is found, so insert the values at the correct address */ 498 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 499 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 500 } 501 } else if (!baij->donotstash) { 502 if (roworiented) { 503 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 504 } else { 505 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 506 } 507 } 508 } 509 if (PetscDefined(USE_DEBUG)) { 510 baij->ht_total_ct += total_ct; 511 baij->ht_insert_ct += insert_ct; 512 } 513 PetscFunctionReturn(0); 514 } 515 516 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 517 { 518 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 519 PetscBool roworiented = baij->roworiented; 520 PetscErrorCode ierr; 521 PetscInt i,j,ii,jj,row,col; 522 PetscInt rstart=baij->rstartbs; 523 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 524 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 525 PetscReal tmp; 526 MatScalar **HD = baij->hd,*baij_a; 527 const PetscScalar *v_t,*value; 528 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 529 530 PetscFunctionBegin; 531 if (roworiented) stepval = (n-1)*bs; 532 else stepval = (m-1)*bs; 533 534 for (i=0; i<m; i++) { 535 if (PetscDefined(USE_DEBUG)) { 536 PetscCheckFalse(im[i] < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %" PetscInt_FMT,im[i]); 537 PetscCheckFalse(im[i] >= baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1); 538 } 539 row = im[i]; 540 v_t = v + i*nbs2; 541 if (row >= rstart && row < rend) { 542 for (j=0; j<n; j++) { 543 col = in[j]; 544 545 /* Look up into the Hash Table */ 546 key = row*Nbs+col+1; 547 h1 = HASH(size,key,tmp); 548 549 idx = h1; 550 if (PetscDefined(USE_DEBUG)) { 551 total_ct++; 552 insert_ct++; 553 if (HT[idx] != key) { 554 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 555 if (idx == size) { 556 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 557 PetscCheckFalse(idx == h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 558 } 559 } 560 } else if (HT[idx] != key) { 561 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 562 if (idx == size) { 563 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 564 PetscCheckFalse(idx == h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 565 } 566 } 567 baij_a = HD[idx]; 568 if (roworiented) { 569 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 570 /* value = v + (i*(stepval+bs)+j)*bs; */ 571 value = v_t; 572 v_t += bs; 573 if (addv == ADD_VALUES) { 574 for (ii=0; ii<bs; ii++,value+=stepval) { 575 for (jj=ii; jj<bs2; jj+=bs) { 576 baij_a[jj] += *value++; 577 } 578 } 579 } else { 580 for (ii=0; ii<bs; ii++,value+=stepval) { 581 for (jj=ii; jj<bs2; jj+=bs) { 582 baij_a[jj] = *value++; 583 } 584 } 585 } 586 } else { 587 value = v + j*(stepval+bs)*bs + i*bs; 588 if (addv == ADD_VALUES) { 589 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 590 for (jj=0; jj<bs; jj++) { 591 baij_a[jj] += *value++; 592 } 593 } 594 } else { 595 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 596 for (jj=0; jj<bs; jj++) { 597 baij_a[jj] = *value++; 598 } 599 } 600 } 601 } 602 } 603 } else { 604 if (!baij->donotstash) { 605 if (roworiented) { 606 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 607 } else { 608 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 609 } 610 } 611 } 612 } 613 if (PetscDefined(USE_DEBUG)) { 614 baij->ht_total_ct += total_ct; 615 baij->ht_insert_ct += insert_ct; 616 } 617 PetscFunctionReturn(0); 618 } 619 620 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 621 { 622 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 623 PetscErrorCode ierr; 624 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 625 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 626 627 PetscFunctionBegin; 628 for (i=0; i<m; i++) { 629 if (idxm[i] < 0) continue; /* negative row */ 630 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1); 631 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 632 row = idxm[i] - bsrstart; 633 for (j=0; j<n; j++) { 634 if (idxn[j] < 0) continue; /* negative column */ 635 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1); 636 if (idxn[j] >= bscstart && idxn[j] < bscend) { 637 col = idxn[j] - bscstart; 638 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 639 } else { 640 if (!baij->colmap) { 641 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 642 } 643 #if defined(PETSC_USE_CTABLE) 644 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 645 data--; 646 #else 647 data = baij->colmap[idxn[j]/bs]-1; 648 #endif 649 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 650 else { 651 col = data + idxn[j]%bs; 652 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 653 } 654 } 655 } 656 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 657 } 658 PetscFunctionReturn(0); 659 } 660 661 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 662 { 663 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 664 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 665 PetscErrorCode ierr; 666 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 667 PetscReal sum = 0.0; 668 MatScalar *v; 669 670 PetscFunctionBegin; 671 if (baij->size == 1) { 672 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 673 } else { 674 if (type == NORM_FROBENIUS) { 675 v = amat->a; 676 nz = amat->nz*bs2; 677 for (i=0; i<nz; i++) { 678 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 679 } 680 v = bmat->a; 681 nz = bmat->nz*bs2; 682 for (i=0; i<nz; i++) { 683 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 684 } 685 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 686 *nrm = PetscSqrtReal(*nrm); 687 } else if (type == NORM_1) { /* max column sum */ 688 PetscReal *tmp,*tmp2; 689 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 690 ierr = PetscCalloc1(mat->cmap->N,&tmp);CHKERRQ(ierr); 691 ierr = PetscMalloc1(mat->cmap->N,&tmp2);CHKERRQ(ierr); 692 v = amat->a; jj = amat->j; 693 for (i=0; i<amat->nz; i++) { 694 for (j=0; j<bs; j++) { 695 col = bs*(cstart + *jj) + j; /* column index */ 696 for (row=0; row<bs; row++) { 697 tmp[col] += PetscAbsScalar(*v); v++; 698 } 699 } 700 jj++; 701 } 702 v = bmat->a; jj = bmat->j; 703 for (i=0; i<bmat->nz; i++) { 704 for (j=0; j<bs; j++) { 705 col = bs*garray[*jj] + j; 706 for (row=0; row<bs; row++) { 707 tmp[col] += PetscAbsScalar(*v); v++; 708 } 709 } 710 jj++; 711 } 712 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 713 *nrm = 0.0; 714 for (j=0; j<mat->cmap->N; j++) { 715 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 716 } 717 ierr = PetscFree(tmp);CHKERRQ(ierr); 718 ierr = PetscFree(tmp2);CHKERRQ(ierr); 719 } else if (type == NORM_INFINITY) { /* max row sum */ 720 PetscReal *sums; 721 ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr); 722 sum = 0.0; 723 for (j=0; j<amat->mbs; j++) { 724 for (row=0; row<bs; row++) sums[row] = 0.0; 725 v = amat->a + bs2*amat->i[j]; 726 nz = amat->i[j+1]-amat->i[j]; 727 for (i=0; i<nz; i++) { 728 for (col=0; col<bs; col++) { 729 for (row=0; row<bs; row++) { 730 sums[row] += PetscAbsScalar(*v); v++; 731 } 732 } 733 } 734 v = bmat->a + bs2*bmat->i[j]; 735 nz = bmat->i[j+1]-bmat->i[j]; 736 for (i=0; i<nz; i++) { 737 for (col=0; col<bs; col++) { 738 for (row=0; row<bs; row++) { 739 sums[row] += PetscAbsScalar(*v); v++; 740 } 741 } 742 } 743 for (row=0; row<bs; row++) { 744 if (sums[row] > sum) sum = sums[row]; 745 } 746 } 747 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 748 ierr = PetscFree(sums);CHKERRQ(ierr); 749 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 750 } 751 PetscFunctionReturn(0); 752 } 753 754 /* 755 Creates the hash table, and sets the table 756 This table is created only once. 757 If new entried need to be added to the matrix 758 then the hash table has to be destroyed and 759 recreated. 760 */ 761 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 762 { 763 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 764 Mat A = baij->A,B=baij->B; 765 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 766 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 767 PetscErrorCode ierr; 768 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 769 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 770 PetscInt *HT,key; 771 MatScalar **HD; 772 PetscReal tmp; 773 #if defined(PETSC_USE_INFO) 774 PetscInt ct=0,max=0; 775 #endif 776 777 PetscFunctionBegin; 778 if (baij->ht) PetscFunctionReturn(0); 779 780 baij->ht_size = (PetscInt)(factor*nz); 781 ht_size = baij->ht_size; 782 783 /* Allocate Memory for Hash Table */ 784 ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr); 785 HD = baij->hd; 786 HT = baij->ht; 787 788 /* Loop Over A */ 789 for (i=0; i<a->mbs; i++) { 790 for (j=ai[i]; j<ai[i+1]; j++) { 791 row = i+rstart; 792 col = aj[j]+cstart; 793 794 key = row*Nbs + col + 1; 795 h1 = HASH(ht_size,key,tmp); 796 for (k=0; k<ht_size; k++) { 797 if (!HT[(h1+k)%ht_size]) { 798 HT[(h1+k)%ht_size] = key; 799 HD[(h1+k)%ht_size] = a->a + j*bs2; 800 break; 801 #if defined(PETSC_USE_INFO) 802 } else { 803 ct++; 804 #endif 805 } 806 } 807 #if defined(PETSC_USE_INFO) 808 if (k> max) max = k; 809 #endif 810 } 811 } 812 /* Loop Over B */ 813 for (i=0; i<b->mbs; i++) { 814 for (j=bi[i]; j<bi[i+1]; j++) { 815 row = i+rstart; 816 col = garray[bj[j]]; 817 key = row*Nbs + col + 1; 818 h1 = HASH(ht_size,key,tmp); 819 for (k=0; k<ht_size; k++) { 820 if (!HT[(h1+k)%ht_size]) { 821 HT[(h1+k)%ht_size] = key; 822 HD[(h1+k)%ht_size] = b->a + j*bs2; 823 break; 824 #if defined(PETSC_USE_INFO) 825 } else { 826 ct++; 827 #endif 828 } 829 } 830 #if defined(PETSC_USE_INFO) 831 if (k> max) max = k; 832 #endif 833 } 834 } 835 836 /* Print Summary */ 837 #if defined(PETSC_USE_INFO) 838 for (i=0,j=0; i<ht_size; i++) { 839 if (HT[i]) j++; 840 } 841 ierr = PetscInfo(mat,"Average Search = %5.2g,max search = %" PetscInt_FMT "\n",(!j) ? (double)0.0:(double)(((PetscReal)(ct+j))/(double)j),max);CHKERRQ(ierr); 842 #endif 843 PetscFunctionReturn(0); 844 } 845 846 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 847 { 848 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 849 PetscErrorCode ierr; 850 PetscInt nstash,reallocs; 851 852 PetscFunctionBegin; 853 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 854 855 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 856 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 857 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 858 ierr = PetscInfo(mat,"Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 859 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 860 ierr = PetscInfo(mat,"Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 861 PetscFunctionReturn(0); 862 } 863 864 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 865 { 866 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 867 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 868 PetscErrorCode ierr; 869 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 870 PetscInt *row,*col; 871 PetscBool r1,r2,r3,other_disassembled; 872 MatScalar *val; 873 PetscMPIInt n; 874 875 PetscFunctionBegin; 876 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 877 if (!baij->donotstash && !mat->nooffprocentries) { 878 while (1) { 879 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 880 if (!flg) break; 881 882 for (i=0; i<n;) { 883 /* Now identify the consecutive vals belonging to the same row */ 884 for (j=i,rstart=row[j]; j<n; j++) { 885 if (row[j] != rstart) break; 886 } 887 if (j < n) ncols = j-i; 888 else ncols = n-i; 889 /* Now assemble all these values with a single function call */ 890 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 891 i = j; 892 } 893 } 894 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 895 /* Now process the block-stash. Since the values are stashed column-oriented, 896 set the roworiented flag to column oriented, and after MatSetValues() 897 restore the original flags */ 898 r1 = baij->roworiented; 899 r2 = a->roworiented; 900 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 901 902 baij->roworiented = PETSC_FALSE; 903 a->roworiented = PETSC_FALSE; 904 905 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 906 while (1) { 907 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 908 if (!flg) break; 909 910 for (i=0; i<n;) { 911 /* Now identify the consecutive vals belonging to the same row */ 912 for (j=i,rstart=row[j]; j<n; j++) { 913 if (row[j] != rstart) break; 914 } 915 if (j < n) ncols = j-i; 916 else ncols = n-i; 917 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 918 i = j; 919 } 920 } 921 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 922 923 baij->roworiented = r1; 924 a->roworiented = r2; 925 926 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 927 } 928 929 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 930 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 931 932 /* determine if any processor has disassembled, if so we must 933 also disassemble ourselfs, in order that we may reassemble. */ 934 /* 935 if nonzero structure of submatrix B cannot change then we know that 936 no processor disassembled thus we can skip this stuff 937 */ 938 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 939 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 940 if (mat->was_assembled && !other_disassembled) { 941 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 942 } 943 } 944 945 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 946 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 947 } 948 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 949 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 950 951 #if defined(PETSC_USE_INFO) 952 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 953 ierr = PetscInfo(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 954 955 baij->ht_total_ct = 0; 956 baij->ht_insert_ct = 0; 957 } 958 #endif 959 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 960 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 961 962 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 963 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 964 } 965 966 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 967 968 baij->rowvalues = NULL; 969 970 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 971 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 972 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 973 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 974 } 975 PetscFunctionReturn(0); 976 } 977 978 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer); 979 #include <petscdraw.h> 980 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 981 { 982 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 983 PetscErrorCode ierr; 984 PetscMPIInt rank = baij->rank; 985 PetscInt bs = mat->rmap->bs; 986 PetscBool iascii,isdraw; 987 PetscViewer sviewer; 988 PetscViewerFormat format; 989 990 PetscFunctionBegin; 991 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 992 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 993 if (iascii) { 994 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 995 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 996 MatInfo info; 997 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr); 998 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 999 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1000 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", 1001 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr); 1002 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1003 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1004 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1005 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1006 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1007 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1008 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1009 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1010 PetscFunctionReturn(0); 1011 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1012 ierr = PetscViewerASCIIPrintf(viewer," block size is %" PetscInt_FMT "\n",bs);CHKERRQ(ierr); 1013 PetscFunctionReturn(0); 1014 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1015 PetscFunctionReturn(0); 1016 } 1017 } 1018 1019 if (isdraw) { 1020 PetscDraw draw; 1021 PetscBool isnull; 1022 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1023 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1024 if (isnull) PetscFunctionReturn(0); 1025 } 1026 1027 { 1028 /* assemble the entire matrix onto first processor. */ 1029 Mat A; 1030 Mat_SeqBAIJ *Aloc; 1031 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1032 MatScalar *a; 1033 const char *matname; 1034 1035 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1036 /* Perhaps this should be the type of mat? */ 1037 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1038 if (rank == 0) { 1039 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1040 } else { 1041 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1042 } 1043 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1044 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 1045 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1046 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1047 1048 /* copy over the A part */ 1049 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1050 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1051 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1052 1053 for (i=0; i<mbs; i++) { 1054 rvals[0] = bs*(baij->rstartbs + i); 1055 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1056 for (j=ai[i]; j<ai[i+1]; j++) { 1057 col = (baij->cstartbs+aj[j])*bs; 1058 for (k=0; k<bs; k++) { 1059 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1060 col++; a += bs; 1061 } 1062 } 1063 } 1064 /* copy over the B part */ 1065 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1066 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1067 for (i=0; i<mbs; i++) { 1068 rvals[0] = bs*(baij->rstartbs + i); 1069 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1070 for (j=ai[i]; j<ai[i+1]; j++) { 1071 col = baij->garray[aj[j]]*bs; 1072 for (k=0; k<bs; k++) { 1073 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1074 col++; a += bs; 1075 } 1076 } 1077 } 1078 ierr = PetscFree(rvals);CHKERRQ(ierr); 1079 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1080 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1081 /* 1082 Everyone has to call to draw the matrix since the graphics waits are 1083 synchronized across all processors that share the PetscDraw object 1084 */ 1085 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1086 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 1087 if (rank == 0) { 1088 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 1089 ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1090 } 1091 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1092 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1093 ierr = MatDestroy(&A);CHKERRQ(ierr); 1094 } 1095 PetscFunctionReturn(0); 1096 } 1097 1098 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1099 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1100 { 1101 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 1102 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)aij->A->data; 1103 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)aij->B->data; 1104 const PetscInt *garray = aij->garray; 1105 PetscInt header[4],M,N,m,rs,cs,bs,nz,cnt,i,j,ja,jb,k,l; 1106 PetscInt *rowlens,*colidxs; 1107 PetscScalar *matvals; 1108 PetscErrorCode ierr; 1109 1110 PetscFunctionBegin; 1111 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1112 1113 M = mat->rmap->N; 1114 N = mat->cmap->N; 1115 m = mat->rmap->n; 1116 rs = mat->rmap->rstart; 1117 cs = mat->cmap->rstart; 1118 bs = mat->rmap->bs; 1119 nz = bs*bs*(A->nz + B->nz); 1120 1121 /* write matrix header */ 1122 header[0] = MAT_FILE_CLASSID; 1123 header[1] = M; header[2] = N; header[3] = nz; 1124 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1125 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1126 1127 /* fill in and store row lengths */ 1128 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1129 for (cnt=0, i=0; i<A->mbs; i++) 1130 for (j=0; j<bs; j++) 1131 rowlens[cnt++] = bs*(A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]); 1132 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1133 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1134 1135 /* fill in and store column indices */ 1136 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1137 for (cnt=0, i=0; i<A->mbs; i++) { 1138 for (k=0; k<bs; k++) { 1139 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1140 if (garray[B->j[jb]] > cs/bs) break; 1141 for (l=0; l<bs; l++) 1142 colidxs[cnt++] = bs*garray[B->j[jb]] + l; 1143 } 1144 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1145 for (l=0; l<bs; l++) 1146 colidxs[cnt++] = bs*A->j[ja] + l + cs; 1147 for (; jb<B->i[i+1]; jb++) 1148 for (l=0; l<bs; l++) 1149 colidxs[cnt++] = bs*garray[B->j[jb]] + l; 1150 } 1151 } 1152 PetscCheckFalse(cnt != nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1153 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_INT);CHKERRQ(ierr); 1154 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1155 1156 /* fill in and store nonzero values */ 1157 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1158 for (cnt=0, i=0; i<A->mbs; i++) { 1159 for (k=0; k<bs; k++) { 1160 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1161 if (garray[B->j[jb]] > cs/bs) break; 1162 for (l=0; l<bs; l++) 1163 matvals[cnt++] = B->a[bs*(bs*jb + l) + k]; 1164 } 1165 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1166 for (l=0; l<bs; l++) 1167 matvals[cnt++] = A->a[bs*(bs*ja + l) + k]; 1168 for (; jb<B->i[i+1]; jb++) 1169 for (l=0; l<bs; l++) 1170 matvals[cnt++] = B->a[bs*(bs*jb + l) + k]; 1171 } 1172 } 1173 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_SCALAR);CHKERRQ(ierr); 1174 ierr = PetscFree(matvals);CHKERRQ(ierr); 1175 1176 /* write block size option to the viewer's .info file */ 1177 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1178 PetscFunctionReturn(0); 1179 } 1180 1181 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1182 { 1183 PetscErrorCode ierr; 1184 PetscBool iascii,isdraw,issocket,isbinary; 1185 1186 PetscFunctionBegin; 1187 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1188 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1189 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1190 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1191 if (iascii || isdraw || issocket) { 1192 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1193 } else if (isbinary) { 1194 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1195 } 1196 PetscFunctionReturn(0); 1197 } 1198 1199 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1200 { 1201 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1202 PetscErrorCode ierr; 1203 1204 PetscFunctionBegin; 1205 #if defined(PETSC_USE_LOG) 1206 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 1207 #endif 1208 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1209 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1210 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1211 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1212 #if defined(PETSC_USE_CTABLE) 1213 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1214 #else 1215 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1216 #endif 1217 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1218 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1219 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1220 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1221 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1222 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1223 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1224 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1225 1226 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 1227 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1228 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1229 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1230 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1231 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1232 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr); 1233 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1234 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr); 1235 #if defined(PETSC_HAVE_HYPRE) 1236 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL);CHKERRQ(ierr); 1237 #endif 1238 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL);CHKERRQ(ierr); 1239 PetscFunctionReturn(0); 1240 } 1241 1242 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1243 { 1244 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1245 PetscErrorCode ierr; 1246 PetscInt nt; 1247 1248 PetscFunctionBegin; 1249 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1250 PetscCheckFalse(nt != A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1251 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1252 PetscCheckFalse(nt != A->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1253 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1254 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1255 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1256 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1257 PetscFunctionReturn(0); 1258 } 1259 1260 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1261 { 1262 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1263 PetscErrorCode ierr; 1264 1265 PetscFunctionBegin; 1266 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1267 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1268 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1269 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1270 PetscFunctionReturn(0); 1271 } 1272 1273 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1274 { 1275 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1276 PetscErrorCode ierr; 1277 1278 PetscFunctionBegin; 1279 /* do nondiagonal part */ 1280 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1281 /* do local part */ 1282 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1283 /* add partial results together */ 1284 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1285 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1286 PetscFunctionReturn(0); 1287 } 1288 1289 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1290 { 1291 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1292 PetscErrorCode ierr; 1293 1294 PetscFunctionBegin; 1295 /* do nondiagonal part */ 1296 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1297 /* do local part */ 1298 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1299 /* add partial results together */ 1300 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1301 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1302 PetscFunctionReturn(0); 1303 } 1304 1305 /* 1306 This only works correctly for square matrices where the subblock A->A is the 1307 diagonal block 1308 */ 1309 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1310 { 1311 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1312 PetscErrorCode ierr; 1313 1314 PetscFunctionBegin; 1315 PetscCheckFalse(A->rmap->N != A->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1316 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1317 PetscFunctionReturn(0); 1318 } 1319 1320 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1321 { 1322 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1323 PetscErrorCode ierr; 1324 1325 PetscFunctionBegin; 1326 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1327 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1328 PetscFunctionReturn(0); 1329 } 1330 1331 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1332 { 1333 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1334 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1335 PetscErrorCode ierr; 1336 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1337 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1338 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1339 1340 PetscFunctionBegin; 1341 PetscCheckFalse(row < brstart || row >= brend,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1342 PetscCheckFalse(mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1343 mat->getrowactive = PETSC_TRUE; 1344 1345 if (!mat->rowvalues && (idx || v)) { 1346 /* 1347 allocate enough space to hold information from the longest row. 1348 */ 1349 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1350 PetscInt max = 1,mbs = mat->mbs,tmp; 1351 for (i=0; i<mbs; i++) { 1352 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1353 if (max < tmp) max = tmp; 1354 } 1355 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1356 } 1357 lrow = row - brstart; 1358 1359 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1360 if (!v) {pvA = NULL; pvB = NULL;} 1361 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1362 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1363 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1364 nztot = nzA + nzB; 1365 1366 cmap = mat->garray; 1367 if (v || idx) { 1368 if (nztot) { 1369 /* Sort by increasing column numbers, assuming A and B already sorted */ 1370 PetscInt imark = -1; 1371 if (v) { 1372 *v = v_p = mat->rowvalues; 1373 for (i=0; i<nzB; i++) { 1374 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1375 else break; 1376 } 1377 imark = i; 1378 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1379 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1380 } 1381 if (idx) { 1382 *idx = idx_p = mat->rowindices; 1383 if (imark > -1) { 1384 for (i=0; i<imark; i++) { 1385 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1386 } 1387 } else { 1388 for (i=0; i<nzB; i++) { 1389 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1390 else break; 1391 } 1392 imark = i; 1393 } 1394 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1395 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1396 } 1397 } else { 1398 if (idx) *idx = NULL; 1399 if (v) *v = NULL; 1400 } 1401 } 1402 *nz = nztot; 1403 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1404 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1405 PetscFunctionReturn(0); 1406 } 1407 1408 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1409 { 1410 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1411 1412 PetscFunctionBegin; 1413 PetscCheckFalse(!baij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1414 baij->getrowactive = PETSC_FALSE; 1415 PetscFunctionReturn(0); 1416 } 1417 1418 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1419 { 1420 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1421 PetscErrorCode ierr; 1422 1423 PetscFunctionBegin; 1424 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1425 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1426 PetscFunctionReturn(0); 1427 } 1428 1429 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1430 { 1431 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1432 Mat A = a->A,B = a->B; 1433 PetscErrorCode ierr; 1434 PetscLogDouble isend[5],irecv[5]; 1435 1436 PetscFunctionBegin; 1437 info->block_size = (PetscReal)matin->rmap->bs; 1438 1439 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1440 1441 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1442 isend[3] = info->memory; isend[4] = info->mallocs; 1443 1444 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1445 1446 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1447 isend[3] += info->memory; isend[4] += info->mallocs; 1448 1449 if (flag == MAT_LOCAL) { 1450 info->nz_used = isend[0]; 1451 info->nz_allocated = isend[1]; 1452 info->nz_unneeded = isend[2]; 1453 info->memory = isend[3]; 1454 info->mallocs = isend[4]; 1455 } else if (flag == MAT_GLOBAL_MAX) { 1456 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr); 1457 1458 info->nz_used = irecv[0]; 1459 info->nz_allocated = irecv[1]; 1460 info->nz_unneeded = irecv[2]; 1461 info->memory = irecv[3]; 1462 info->mallocs = irecv[4]; 1463 } else if (flag == MAT_GLOBAL_SUM) { 1464 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr); 1465 1466 info->nz_used = irecv[0]; 1467 info->nz_allocated = irecv[1]; 1468 info->nz_unneeded = irecv[2]; 1469 info->memory = irecv[3]; 1470 info->mallocs = irecv[4]; 1471 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1472 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1473 info->fill_ratio_needed = 0; 1474 info->factor_mallocs = 0; 1475 PetscFunctionReturn(0); 1476 } 1477 1478 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1479 { 1480 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1481 PetscErrorCode ierr; 1482 1483 PetscFunctionBegin; 1484 switch (op) { 1485 case MAT_NEW_NONZERO_LOCATIONS: 1486 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1487 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1488 case MAT_KEEP_NONZERO_PATTERN: 1489 case MAT_NEW_NONZERO_LOCATION_ERR: 1490 MatCheckPreallocated(A,1); 1491 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1492 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1493 break; 1494 case MAT_ROW_ORIENTED: 1495 MatCheckPreallocated(A,1); 1496 a->roworiented = flg; 1497 1498 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1499 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1500 break; 1501 case MAT_FORCE_DIAGONAL_ENTRIES: 1502 case MAT_SORTED_FULL: 1503 ierr = PetscInfo(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1504 break; 1505 case MAT_IGNORE_OFF_PROC_ENTRIES: 1506 a->donotstash = flg; 1507 break; 1508 case MAT_USE_HASH_TABLE: 1509 a->ht_flag = flg; 1510 a->ht_fact = 1.39; 1511 break; 1512 case MAT_SYMMETRIC: 1513 case MAT_STRUCTURALLY_SYMMETRIC: 1514 case MAT_HERMITIAN: 1515 case MAT_SUBMAT_SINGLEIS: 1516 case MAT_SYMMETRY_ETERNAL: 1517 MatCheckPreallocated(A,1); 1518 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1519 break; 1520 default: 1521 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1522 } 1523 PetscFunctionReturn(0); 1524 } 1525 1526 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1527 { 1528 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1529 Mat_SeqBAIJ *Aloc; 1530 Mat B; 1531 PetscErrorCode ierr; 1532 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1533 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1534 MatScalar *a; 1535 1536 PetscFunctionBegin; 1537 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1538 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1539 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1540 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1541 /* Do not know preallocation information, but must set block size */ 1542 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr); 1543 } else { 1544 B = *matout; 1545 } 1546 1547 /* copy over the A part */ 1548 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1549 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1550 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1551 1552 for (i=0; i<mbs; i++) { 1553 rvals[0] = bs*(baij->rstartbs + i); 1554 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1555 for (j=ai[i]; j<ai[i+1]; j++) { 1556 col = (baij->cstartbs+aj[j])*bs; 1557 for (k=0; k<bs; k++) { 1558 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1559 1560 col++; a += bs; 1561 } 1562 } 1563 } 1564 /* copy over the B part */ 1565 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1566 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1567 for (i=0; i<mbs; i++) { 1568 rvals[0] = bs*(baij->rstartbs + i); 1569 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1570 for (j=ai[i]; j<ai[i+1]; j++) { 1571 col = baij->garray[aj[j]]*bs; 1572 for (k=0; k<bs; k++) { 1573 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1574 col++; 1575 a += bs; 1576 } 1577 } 1578 } 1579 ierr = PetscFree(rvals);CHKERRQ(ierr); 1580 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1581 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1582 1583 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1584 else { 1585 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 1586 } 1587 PetscFunctionReturn(0); 1588 } 1589 1590 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1591 { 1592 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1593 Mat a = baij->A,b = baij->B; 1594 PetscErrorCode ierr; 1595 PetscInt s1,s2,s3; 1596 1597 PetscFunctionBegin; 1598 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1599 if (rr) { 1600 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1601 PetscCheckFalse(s1!=s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1602 /* Overlap communication with computation. */ 1603 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1604 } 1605 if (ll) { 1606 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1607 PetscCheckFalse(s1!=s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1608 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1609 } 1610 /* scale the diagonal block */ 1611 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1612 1613 if (rr) { 1614 /* Do a scatter end and then right scale the off-diagonal block */ 1615 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1616 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1617 } 1618 PetscFunctionReturn(0); 1619 } 1620 1621 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1622 { 1623 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1624 PetscInt *lrows; 1625 PetscInt r, len; 1626 PetscBool cong; 1627 PetscErrorCode ierr; 1628 1629 PetscFunctionBegin; 1630 /* get locally owned rows */ 1631 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 1632 /* fix right hand side if needed */ 1633 if (x && b) { 1634 const PetscScalar *xx; 1635 PetscScalar *bb; 1636 1637 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1638 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1639 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 1640 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1641 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1642 } 1643 1644 /* actually zap the local rows */ 1645 /* 1646 Zero the required rows. If the "diagonal block" of the matrix 1647 is square and the user wishes to set the diagonal we use separate 1648 code so that MatSetValues() is not called for each diagonal allocating 1649 new memory, thus calling lots of mallocs and slowing things down. 1650 1651 */ 1652 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1653 ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1654 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 1655 if ((diag != 0.0) && cong) { 1656 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr); 1657 } else if (diag != 0.0) { 1658 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1659 PetscCheckFalse(((Mat_SeqBAIJ*)l->A->data)->nonew,PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1660 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1661 for (r = 0; r < len; ++r) { 1662 const PetscInt row = lrows[r] + A->rmap->rstart; 1663 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1664 } 1665 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1666 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1667 } else { 1668 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1669 } 1670 ierr = PetscFree(lrows);CHKERRQ(ierr); 1671 1672 /* only change matrix nonzero state if pattern was allowed to be changed */ 1673 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1674 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1675 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 1676 } 1677 PetscFunctionReturn(0); 1678 } 1679 1680 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1681 { 1682 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1683 PetscErrorCode ierr; 1684 PetscMPIInt n = A->rmap->n,p = 0; 1685 PetscInt i,j,k,r,len = 0,row,col,count; 1686 PetscInt *lrows,*owners = A->rmap->range; 1687 PetscSFNode *rrows; 1688 PetscSF sf; 1689 const PetscScalar *xx; 1690 PetscScalar *bb,*mask; 1691 Vec xmask,lmask; 1692 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)l->B->data; 1693 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1694 PetscScalar *aa; 1695 1696 PetscFunctionBegin; 1697 /* Create SF where leaves are input rows and roots are owned rows */ 1698 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1699 for (r = 0; r < n; ++r) lrows[r] = -1; 1700 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1701 for (r = 0; r < N; ++r) { 1702 const PetscInt idx = rows[r]; 1703 PetscCheckFalse(idx < 0 || A->rmap->N <= idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N); 1704 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1705 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1706 } 1707 rrows[r].rank = p; 1708 rrows[r].index = rows[r] - owners[p]; 1709 } 1710 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1711 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1712 /* Collect flags for rows to be zeroed */ 1713 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1714 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1715 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1716 /* Compress and put in row numbers */ 1717 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1718 /* zero diagonal part of matrix */ 1719 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1720 /* handle off diagonal part of matrix */ 1721 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1722 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1723 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1724 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1725 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1726 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1727 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1728 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1729 if (x) { 1730 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1731 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1732 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1733 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1734 } 1735 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1736 /* remove zeroed rows of off diagonal matrix */ 1737 for (i = 0; i < len; ++i) { 1738 row = lrows[i]; 1739 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1740 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 1741 for (k = 0; k < count; ++k) { 1742 aa[0] = 0.0; 1743 aa += bs; 1744 } 1745 } 1746 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1747 for (i = 0; i < l->B->rmap->N; ++i) { 1748 row = i/bs; 1749 for (j = baij->i[row]; j < baij->i[row+1]; ++j) { 1750 for (k = 0; k < bs; ++k) { 1751 col = bs*baij->j[j] + k; 1752 if (PetscAbsScalar(mask[col])) { 1753 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 1754 if (x) bb[i] -= aa[0]*xx[col]; 1755 aa[0] = 0.0; 1756 } 1757 } 1758 } 1759 } 1760 if (x) { 1761 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1762 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1763 } 1764 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1765 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1766 ierr = PetscFree(lrows);CHKERRQ(ierr); 1767 1768 /* only change matrix nonzero state if pattern was allowed to be changed */ 1769 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1770 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1771 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 1772 } 1773 PetscFunctionReturn(0); 1774 } 1775 1776 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1777 { 1778 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1779 PetscErrorCode ierr; 1780 1781 PetscFunctionBegin; 1782 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1783 PetscFunctionReturn(0); 1784 } 1785 1786 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 1787 1788 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1789 { 1790 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1791 Mat a,b,c,d; 1792 PetscBool flg; 1793 PetscErrorCode ierr; 1794 1795 PetscFunctionBegin; 1796 a = matA->A; b = matA->B; 1797 c = matB->A; d = matB->B; 1798 1799 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1800 if (flg) { 1801 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1802 } 1803 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 1804 PetscFunctionReturn(0); 1805 } 1806 1807 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1808 { 1809 PetscErrorCode ierr; 1810 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1811 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 1812 1813 PetscFunctionBegin; 1814 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1815 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1816 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1817 } else { 1818 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1819 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1820 } 1821 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 1822 PetscFunctionReturn(0); 1823 } 1824 1825 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1826 { 1827 PetscErrorCode ierr; 1828 1829 PetscFunctionBegin; 1830 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 1831 PetscFunctionReturn(0); 1832 } 1833 1834 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 1835 { 1836 PetscErrorCode ierr; 1837 PetscInt bs = Y->rmap->bs,m = Y->rmap->N/bs; 1838 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 1839 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 1840 1841 PetscFunctionBegin; 1842 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 1843 PetscFunctionReturn(0); 1844 } 1845 1846 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1847 { 1848 PetscErrorCode ierr; 1849 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 1850 PetscBLASInt bnz,one=1; 1851 Mat_SeqBAIJ *x,*y; 1852 PetscInt bs2 = Y->rmap->bs*Y->rmap->bs; 1853 1854 PetscFunctionBegin; 1855 if (str == SAME_NONZERO_PATTERN) { 1856 PetscScalar alpha = a; 1857 x = (Mat_SeqBAIJ*)xx->A->data; 1858 y = (Mat_SeqBAIJ*)yy->A->data; 1859 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 1860 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1861 x = (Mat_SeqBAIJ*)xx->B->data; 1862 y = (Mat_SeqBAIJ*)yy->B->data; 1863 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 1864 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1865 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1866 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1867 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1868 } else { 1869 Mat B; 1870 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1871 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1872 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1873 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1874 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1875 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1876 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1877 ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr); 1878 ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1879 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1880 ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1881 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1882 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1883 ierr = MatHeaderMerge(Y,&B);CHKERRQ(ierr); 1884 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1885 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1886 } 1887 PetscFunctionReturn(0); 1888 } 1889 1890 PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) 1891 { 1892 #if defined(PETSC_USE_COMPLEX) 1893 PetscErrorCode ierr; 1894 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1895 1896 PetscFunctionBegin; 1897 ierr = MatConjugate_SeqBAIJ(a->A);CHKERRQ(ierr); 1898 ierr = MatConjugate_SeqBAIJ(a->B);CHKERRQ(ierr); 1899 #else 1900 PetscFunctionBegin; 1901 #endif 1902 PetscFunctionReturn(0); 1903 } 1904 1905 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1906 { 1907 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1908 PetscErrorCode ierr; 1909 1910 PetscFunctionBegin; 1911 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1912 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1913 PetscFunctionReturn(0); 1914 } 1915 1916 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1917 { 1918 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1919 PetscErrorCode ierr; 1920 1921 PetscFunctionBegin; 1922 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1923 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1924 PetscFunctionReturn(0); 1925 } 1926 1927 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1928 { 1929 PetscErrorCode ierr; 1930 IS iscol_local; 1931 PetscInt csize; 1932 1933 PetscFunctionBegin; 1934 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1935 if (call == MAT_REUSE_MATRIX) { 1936 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1937 PetscCheckFalse(!iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1938 } else { 1939 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1940 } 1941 ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1942 if (call == MAT_INITIAL_MATRIX) { 1943 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1944 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1945 } 1946 PetscFunctionReturn(0); 1947 } 1948 1949 /* 1950 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1951 in local and then by concatenating the local matrices the end result. 1952 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1953 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1954 */ 1955 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 1956 { 1957 PetscErrorCode ierr; 1958 PetscMPIInt rank,size; 1959 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 1960 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 1961 Mat M,Mreuse; 1962 MatScalar *vwork,*aa; 1963 MPI_Comm comm; 1964 IS isrow_new, iscol_new; 1965 Mat_SeqBAIJ *aij; 1966 1967 PetscFunctionBegin; 1968 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 1969 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 1970 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 1971 /* The compression and expansion should be avoided. Doesn't point 1972 out errors, might change the indices, hence buggey */ 1973 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 1974 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 1975 1976 if (call == MAT_REUSE_MATRIX) { 1977 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 1978 PetscCheckFalse(!Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1979 ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse);CHKERRQ(ierr); 1980 } else { 1981 ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse);CHKERRQ(ierr); 1982 } 1983 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 1984 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 1985 /* 1986 m - number of local rows 1987 n - number of columns (same on all processors) 1988 rstart - first row in new global matrix generated 1989 */ 1990 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 1991 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1992 m = m/bs; 1993 n = n/bs; 1994 1995 if (call == MAT_INITIAL_MATRIX) { 1996 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1997 ii = aij->i; 1998 jj = aij->j; 1999 2000 /* 2001 Determine the number of non-zeros in the diagonal and off-diagonal 2002 portions of the matrix in order to do correct preallocation 2003 */ 2004 2005 /* first get start and end of "diagonal" columns */ 2006 if (csize == PETSC_DECIDE) { 2007 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2008 if (mglobal == n*bs) { /* square matrix */ 2009 nlocal = m; 2010 } else { 2011 nlocal = n/size + ((n % size) > rank); 2012 } 2013 } else { 2014 nlocal = csize/bs; 2015 } 2016 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 2017 rstart = rend - nlocal; 2018 PetscCheckFalse(rank == size - 1 && rend != n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n); 2019 2020 /* next, compute all the lengths */ 2021 ierr = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr); 2022 for (i=0; i<m; i++) { 2023 jend = ii[i+1] - ii[i]; 2024 olen = 0; 2025 dlen = 0; 2026 for (j=0; j<jend; j++) { 2027 if (*jj < rstart || *jj >= rend) olen++; 2028 else dlen++; 2029 jj++; 2030 } 2031 olens[i] = olen; 2032 dlens[i] = dlen; 2033 } 2034 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2035 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2036 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2037 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2038 ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2039 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 2040 } else { 2041 PetscInt ml,nl; 2042 2043 M = *newmat; 2044 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2045 PetscCheckFalse(ml != m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2046 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2047 /* 2048 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2049 rather than the slower MatSetValues(). 2050 */ 2051 M->was_assembled = PETSC_TRUE; 2052 M->assembled = PETSC_FALSE; 2053 } 2054 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2055 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2056 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2057 ii = aij->i; 2058 jj = aij->j; 2059 aa = aij->a; 2060 for (i=0; i<m; i++) { 2061 row = rstart/bs + i; 2062 nz = ii[i+1] - ii[i]; 2063 cwork = jj; jj += nz; 2064 vwork = aa; aa += nz*bs*bs; 2065 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2066 } 2067 2068 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2069 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2070 *newmat = M; 2071 2072 /* save submatrix used in processor for next request */ 2073 if (call == MAT_INITIAL_MATRIX) { 2074 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2075 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2076 } 2077 PetscFunctionReturn(0); 2078 } 2079 2080 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2081 { 2082 MPI_Comm comm,pcomm; 2083 PetscInt clocal_size,nrows; 2084 const PetscInt *rows; 2085 PetscMPIInt size; 2086 IS crowp,lcolp; 2087 PetscErrorCode ierr; 2088 2089 PetscFunctionBegin; 2090 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2091 /* make a collective version of 'rowp' */ 2092 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2093 if (pcomm==comm) { 2094 crowp = rowp; 2095 } else { 2096 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2097 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2098 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2099 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2100 } 2101 ierr = ISSetPermutation(crowp);CHKERRQ(ierr); 2102 /* make a local version of 'colp' */ 2103 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2104 ierr = MPI_Comm_size(pcomm,&size);CHKERRMPI(ierr); 2105 if (size==1) { 2106 lcolp = colp; 2107 } else { 2108 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 2109 } 2110 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2111 /* now we just get the submatrix */ 2112 ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr); 2113 ierr = MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2114 /* clean up */ 2115 if (pcomm!=comm) { 2116 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2117 } 2118 if (size>1) { 2119 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2120 } 2121 PetscFunctionReturn(0); 2122 } 2123 2124 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2125 { 2126 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2127 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2128 2129 PetscFunctionBegin; 2130 if (nghosts) *nghosts = B->nbs; 2131 if (ghosts) *ghosts = baij->garray; 2132 PetscFunctionReturn(0); 2133 } 2134 2135 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2136 { 2137 Mat B; 2138 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2139 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2140 Mat_SeqAIJ *b; 2141 PetscErrorCode ierr; 2142 PetscMPIInt size,rank,*recvcounts = NULL,*displs = NULL; 2143 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2144 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2145 2146 PetscFunctionBegin; 2147 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 2148 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr); 2149 2150 /* ---------------------------------------------------------------- 2151 Tell every processor the number of nonzeros per row 2152 */ 2153 ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr); 2154 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2155 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2156 } 2157 ierr = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr); 2158 displs = recvcounts + size; 2159 for (i=0; i<size; i++) { 2160 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2161 displs[i] = A->rmap->range[i]/bs; 2162 } 2163 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 2164 /* --------------------------------------------------------------- 2165 Create the sequential matrix of the same type as the local block diagonal 2166 */ 2167 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2168 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2169 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2170 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2171 b = (Mat_SeqAIJ*)B->data; 2172 2173 /*-------------------------------------------------------------------- 2174 Copy my part of matrix column indices over 2175 */ 2176 sendcount = ad->nz + bd->nz; 2177 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2178 a_jsendbuf = ad->j; 2179 b_jsendbuf = bd->j; 2180 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2181 cnt = 0; 2182 for (i=0; i<n; i++) { 2183 2184 /* put in lower diagonal portion */ 2185 m = bd->i[i+1] - bd->i[i]; 2186 while (m > 0) { 2187 /* is it above diagonal (in bd (compressed) numbering) */ 2188 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2189 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2190 m--; 2191 } 2192 2193 /* put in diagonal portion */ 2194 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2195 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2196 } 2197 2198 /* put in upper diagonal portion */ 2199 while (m-- > 0) { 2200 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2201 } 2202 } 2203 PetscCheckFalse(cnt != sendcount,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT,sendcount,cnt); 2204 2205 /*-------------------------------------------------------------------- 2206 Gather all column indices to all processors 2207 */ 2208 for (i=0; i<size; i++) { 2209 recvcounts[i] = 0; 2210 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2211 recvcounts[i] += lens[j]; 2212 } 2213 } 2214 displs[0] = 0; 2215 for (i=1; i<size; i++) { 2216 displs[i] = displs[i-1] + recvcounts[i-1]; 2217 } 2218 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 2219 /*-------------------------------------------------------------------- 2220 Assemble the matrix into useable form (note numerical values not yet set) 2221 */ 2222 /* set the b->ilen (length of each row) values */ 2223 ierr = PetscArraycpy(b->ilen,lens,A->rmap->N/bs);CHKERRQ(ierr); 2224 /* set the b->i indices */ 2225 b->i[0] = 0; 2226 for (i=1; i<=A->rmap->N/bs; i++) { 2227 b->i[i] = b->i[i-1] + lens[i-1]; 2228 } 2229 ierr = PetscFree(lens);CHKERRQ(ierr); 2230 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2231 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2232 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2233 2234 if (A->symmetric) { 2235 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2236 } else if (A->hermitian) { 2237 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2238 } else if (A->structurally_symmetric) { 2239 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2240 } 2241 *newmat = B; 2242 PetscFunctionReturn(0); 2243 } 2244 2245 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2246 { 2247 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2248 PetscErrorCode ierr; 2249 Vec bb1 = NULL; 2250 2251 PetscFunctionBegin; 2252 if (flag == SOR_APPLY_UPPER) { 2253 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2254 PetscFunctionReturn(0); 2255 } 2256 2257 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2258 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2259 } 2260 2261 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2262 if (flag & SOR_ZERO_INITIAL_GUESS) { 2263 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2264 its--; 2265 } 2266 2267 while (its--) { 2268 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2269 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2270 2271 /* update rhs: bb1 = bb - B*x */ 2272 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2273 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2274 2275 /* local sweep */ 2276 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2277 } 2278 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2279 if (flag & SOR_ZERO_INITIAL_GUESS) { 2280 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2281 its--; 2282 } 2283 while (its--) { 2284 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2285 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2286 2287 /* update rhs: bb1 = bb - B*x */ 2288 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2289 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2290 2291 /* local sweep */ 2292 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2293 } 2294 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2295 if (flag & SOR_ZERO_INITIAL_GUESS) { 2296 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2297 its--; 2298 } 2299 while (its--) { 2300 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2301 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2302 2303 /* update rhs: bb1 = bb - B*x */ 2304 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2305 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2306 2307 /* local sweep */ 2308 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2309 } 2310 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2311 2312 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2313 PetscFunctionReturn(0); 2314 } 2315 2316 PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A,PetscInt type,PetscReal *reductions) 2317 { 2318 PetscErrorCode ierr; 2319 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data; 2320 PetscInt m,N,i,*garray = aij->garray; 2321 PetscInt ib,jb,bs = A->rmap->bs; 2322 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data; 2323 MatScalar *a_val = a_aij->a; 2324 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data; 2325 MatScalar *b_val = b_aij->a; 2326 PetscReal *work; 2327 2328 PetscFunctionBegin; 2329 ierr = MatGetSize(A,&m,&N);CHKERRQ(ierr); 2330 ierr = PetscCalloc1(N,&work);CHKERRQ(ierr); 2331 if (type == NORM_2) { 2332 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2333 for (jb=0; jb<bs; jb++) { 2334 for (ib=0; ib<bs; ib++) { 2335 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2336 a_val++; 2337 } 2338 } 2339 } 2340 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2341 for (jb=0; jb<bs; jb++) { 2342 for (ib=0; ib<bs; ib++) { 2343 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2344 b_val++; 2345 } 2346 } 2347 } 2348 } else if (type == NORM_1) { 2349 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2350 for (jb=0; jb<bs; jb++) { 2351 for (ib=0; ib<bs; ib++) { 2352 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2353 a_val++; 2354 } 2355 } 2356 } 2357 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2358 for (jb=0; jb<bs; jb++) { 2359 for (ib=0; ib<bs; ib++) { 2360 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2361 b_val++; 2362 } 2363 } 2364 } 2365 } else if (type == NORM_INFINITY) { 2366 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2367 for (jb=0; jb<bs; jb++) { 2368 for (ib=0; ib<bs; ib++) { 2369 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2370 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2371 a_val++; 2372 } 2373 } 2374 } 2375 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2376 for (jb=0; jb<bs; jb++) { 2377 for (ib=0; ib<bs; ib++) { 2378 int col = garray[b_aij->j[i]] * bs + jb; 2379 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2380 b_val++; 2381 } 2382 } 2383 } 2384 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2385 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2386 for (jb=0; jb<bs; jb++) { 2387 for (ib=0; ib<bs; ib++) { 2388 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 2389 a_val++; 2390 } 2391 } 2392 } 2393 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2394 for (jb=0; jb<bs; jb++) { 2395 for (ib=0; ib<bs; ib++) { 2396 work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val); 2397 b_val++; 2398 } 2399 } 2400 } 2401 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2402 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2403 for (jb=0; jb<bs; jb++) { 2404 for (ib=0; ib<bs; ib++) { 2405 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 2406 a_val++; 2407 } 2408 } 2409 } 2410 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2411 for (jb=0; jb<bs; jb++) { 2412 for (ib=0; ib<bs; ib++) { 2413 work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val); 2414 b_val++; 2415 } 2416 } 2417 } 2418 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 2419 if (type == NORM_INFINITY) { 2420 ierr = MPIU_Allreduce(work,reductions,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 2421 } else { 2422 ierr = MPIU_Allreduce(work,reductions,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 2423 } 2424 ierr = PetscFree(work);CHKERRQ(ierr); 2425 if (type == NORM_2) { 2426 for (i=0; i<N; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2427 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2428 for (i=0; i<N; i++) reductions[i] /= m; 2429 } 2430 PetscFunctionReturn(0); 2431 } 2432 2433 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2434 { 2435 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2436 PetscErrorCode ierr; 2437 2438 PetscFunctionBegin; 2439 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2440 A->factorerrortype = a->A->factorerrortype; 2441 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2442 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2443 PetscFunctionReturn(0); 2444 } 2445 2446 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a) 2447 { 2448 PetscErrorCode ierr; 2449 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ*)Y->data; 2450 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)maij->A->data; 2451 2452 PetscFunctionBegin; 2453 if (!Y->preallocated) { 2454 ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 2455 } else if (!aij->nz) { 2456 PetscInt nonew = aij->nonew; 2457 ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2458 aij->nonew = nonew; 2459 } 2460 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2461 PetscFunctionReturn(0); 2462 } 2463 2464 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool *missing,PetscInt *d) 2465 { 2466 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2467 PetscErrorCode ierr; 2468 2469 PetscFunctionBegin; 2470 PetscCheckFalse(A->rmap->n != A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2471 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2472 if (d) { 2473 PetscInt rstart; 2474 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2475 *d += rstart/A->rmap->bs; 2476 2477 } 2478 PetscFunctionReturn(0); 2479 } 2480 2481 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2482 { 2483 PetscFunctionBegin; 2484 *a = ((Mat_MPIBAIJ*)A->data)->A; 2485 PetscFunctionReturn(0); 2486 } 2487 2488 /* -------------------------------------------------------------------*/ 2489 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2490 MatGetRow_MPIBAIJ, 2491 MatRestoreRow_MPIBAIJ, 2492 MatMult_MPIBAIJ, 2493 /* 4*/ MatMultAdd_MPIBAIJ, 2494 MatMultTranspose_MPIBAIJ, 2495 MatMultTransposeAdd_MPIBAIJ, 2496 NULL, 2497 NULL, 2498 NULL, 2499 /*10*/ NULL, 2500 NULL, 2501 NULL, 2502 MatSOR_MPIBAIJ, 2503 MatTranspose_MPIBAIJ, 2504 /*15*/ MatGetInfo_MPIBAIJ, 2505 MatEqual_MPIBAIJ, 2506 MatGetDiagonal_MPIBAIJ, 2507 MatDiagonalScale_MPIBAIJ, 2508 MatNorm_MPIBAIJ, 2509 /*20*/ MatAssemblyBegin_MPIBAIJ, 2510 MatAssemblyEnd_MPIBAIJ, 2511 MatSetOption_MPIBAIJ, 2512 MatZeroEntries_MPIBAIJ, 2513 /*24*/ MatZeroRows_MPIBAIJ, 2514 NULL, 2515 NULL, 2516 NULL, 2517 NULL, 2518 /*29*/ MatSetUp_MPIBAIJ, 2519 NULL, 2520 NULL, 2521 MatGetDiagonalBlock_MPIBAIJ, 2522 NULL, 2523 /*34*/ MatDuplicate_MPIBAIJ, 2524 NULL, 2525 NULL, 2526 NULL, 2527 NULL, 2528 /*39*/ MatAXPY_MPIBAIJ, 2529 MatCreateSubMatrices_MPIBAIJ, 2530 MatIncreaseOverlap_MPIBAIJ, 2531 MatGetValues_MPIBAIJ, 2532 MatCopy_MPIBAIJ, 2533 /*44*/ NULL, 2534 MatScale_MPIBAIJ, 2535 MatShift_MPIBAIJ, 2536 NULL, 2537 MatZeroRowsColumns_MPIBAIJ, 2538 /*49*/ NULL, 2539 NULL, 2540 NULL, 2541 NULL, 2542 NULL, 2543 /*54*/ MatFDColoringCreate_MPIXAIJ, 2544 NULL, 2545 MatSetUnfactored_MPIBAIJ, 2546 MatPermute_MPIBAIJ, 2547 MatSetValuesBlocked_MPIBAIJ, 2548 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2549 MatDestroy_MPIBAIJ, 2550 MatView_MPIBAIJ, 2551 NULL, 2552 NULL, 2553 /*64*/ NULL, 2554 NULL, 2555 NULL, 2556 NULL, 2557 NULL, 2558 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2559 NULL, 2560 NULL, 2561 NULL, 2562 NULL, 2563 /*74*/ NULL, 2564 MatFDColoringApply_BAIJ, 2565 NULL, 2566 NULL, 2567 NULL, 2568 /*79*/ NULL, 2569 NULL, 2570 NULL, 2571 NULL, 2572 MatLoad_MPIBAIJ, 2573 /*84*/ NULL, 2574 NULL, 2575 NULL, 2576 NULL, 2577 NULL, 2578 /*89*/ NULL, 2579 NULL, 2580 NULL, 2581 NULL, 2582 NULL, 2583 /*94*/ NULL, 2584 NULL, 2585 NULL, 2586 NULL, 2587 NULL, 2588 /*99*/ NULL, 2589 NULL, 2590 NULL, 2591 MatConjugate_MPIBAIJ, 2592 NULL, 2593 /*104*/NULL, 2594 MatRealPart_MPIBAIJ, 2595 MatImaginaryPart_MPIBAIJ, 2596 NULL, 2597 NULL, 2598 /*109*/NULL, 2599 NULL, 2600 NULL, 2601 NULL, 2602 MatMissingDiagonal_MPIBAIJ, 2603 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2604 NULL, 2605 MatGetGhosts_MPIBAIJ, 2606 NULL, 2607 NULL, 2608 /*119*/NULL, 2609 NULL, 2610 NULL, 2611 NULL, 2612 MatGetMultiProcBlock_MPIBAIJ, 2613 /*124*/NULL, 2614 MatGetColumnReductions_MPIBAIJ, 2615 MatInvertBlockDiagonal_MPIBAIJ, 2616 NULL, 2617 NULL, 2618 /*129*/ NULL, 2619 NULL, 2620 NULL, 2621 NULL, 2622 NULL, 2623 /*134*/ NULL, 2624 NULL, 2625 NULL, 2626 NULL, 2627 NULL, 2628 /*139*/ MatSetBlockSizes_Default, 2629 NULL, 2630 NULL, 2631 MatFDColoringSetUp_MPIXAIJ, 2632 NULL, 2633 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ, 2634 NULL, 2635 NULL, 2636 NULL 2637 }; 2638 2639 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 2640 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 2641 2642 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2643 { 2644 PetscInt m,rstart,cstart,cend; 2645 PetscInt i,j,dlen,olen,nz,nz_max=0,*d_nnz=NULL,*o_nnz=NULL; 2646 const PetscInt *JJ =NULL; 2647 PetscScalar *values=NULL; 2648 PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented; 2649 PetscErrorCode ierr; 2650 PetscBool nooffprocentries; 2651 2652 PetscFunctionBegin; 2653 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2654 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2655 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2656 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2657 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2658 m = B->rmap->n/bs; 2659 rstart = B->rmap->rstart/bs; 2660 cstart = B->cmap->rstart/bs; 2661 cend = B->cmap->rend/bs; 2662 2663 PetscCheckFalse(ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %" PetscInt_FMT,ii[0]); 2664 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2665 for (i=0; i<m; i++) { 2666 nz = ii[i+1] - ii[i]; 2667 PetscCheckFalse(nz < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT,i,nz); 2668 nz_max = PetscMax(nz_max,nz); 2669 dlen = 0; 2670 olen = 0; 2671 JJ = jj + ii[i]; 2672 for (j=0; j<nz; j++) { 2673 if (*JJ < cstart || *JJ >= cend) olen++; 2674 else dlen++; 2675 JJ++; 2676 } 2677 d_nnz[i] = dlen; 2678 o_nnz[i] = olen; 2679 } 2680 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2681 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2682 2683 values = (PetscScalar*)V; 2684 if (!values) { 2685 ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2686 } 2687 for (i=0; i<m; i++) { 2688 PetscInt row = i + rstart; 2689 PetscInt ncols = ii[i+1] - ii[i]; 2690 const PetscInt *icols = jj + ii[i]; 2691 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2692 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2693 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2694 } else { /* block ordering does not match so we can only insert one block at a time. */ 2695 PetscInt j; 2696 for (j=0; j<ncols; j++) { 2697 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2698 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2699 } 2700 } 2701 } 2702 2703 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2704 nooffprocentries = B->nooffprocentries; 2705 B->nooffprocentries = PETSC_TRUE; 2706 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2707 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2708 B->nooffprocentries = nooffprocentries; 2709 2710 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2711 PetscFunctionReturn(0); 2712 } 2713 2714 /*@C 2715 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values 2716 2717 Collective 2718 2719 Input Parameters: 2720 + B - the matrix 2721 . bs - the block size 2722 . i - the indices into j for the start of each local row (starts with zero) 2723 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2724 - v - optional values in the matrix 2725 2726 Level: advanced 2727 2728 Notes: 2729 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 2730 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 2731 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2732 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2733 block column and the second index is over columns within a block. 2734 2735 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2736 2737 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ 2738 @*/ 2739 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2740 { 2741 PetscErrorCode ierr; 2742 2743 PetscFunctionBegin; 2744 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2745 PetscValidType(B,1); 2746 PetscValidLogicalCollectiveInt(B,bs,2); 2747 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2748 PetscFunctionReturn(0); 2749 } 2750 2751 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2752 { 2753 Mat_MPIBAIJ *b; 2754 PetscErrorCode ierr; 2755 PetscInt i; 2756 PetscMPIInt size; 2757 2758 PetscFunctionBegin; 2759 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2760 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2761 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2762 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2763 2764 if (d_nnz) { 2765 for (i=0; i<B->rmap->n/bs; i++) { 2766 PetscCheckFalse(d_nnz[i] < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,d_nnz[i]); 2767 } 2768 } 2769 if (o_nnz) { 2770 for (i=0; i<B->rmap->n/bs; i++) { 2771 PetscCheckFalse(o_nnz[i] < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,o_nnz[i]); 2772 } 2773 } 2774 2775 b = (Mat_MPIBAIJ*)B->data; 2776 b->bs2 = bs*bs; 2777 b->mbs = B->rmap->n/bs; 2778 b->nbs = B->cmap->n/bs; 2779 b->Mbs = B->rmap->N/bs; 2780 b->Nbs = B->cmap->N/bs; 2781 2782 for (i=0; i<=b->size; i++) { 2783 b->rangebs[i] = B->rmap->range[i]/bs; 2784 } 2785 b->rstartbs = B->rmap->rstart/bs; 2786 b->rendbs = B->rmap->rend/bs; 2787 b->cstartbs = B->cmap->rstart/bs; 2788 b->cendbs = B->cmap->rend/bs; 2789 2790 #if defined(PETSC_USE_CTABLE) 2791 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2792 #else 2793 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2794 #endif 2795 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2796 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2797 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2798 2799 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2800 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 2801 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2802 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2803 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2804 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2805 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2806 2807 if (!B->preallocated) { 2808 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2809 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2810 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2811 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2812 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2813 } 2814 2815 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2816 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2817 B->preallocated = PETSC_TRUE; 2818 B->was_assembled = PETSC_FALSE; 2819 B->assembled = PETSC_FALSE; 2820 PetscFunctionReturn(0); 2821 } 2822 2823 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2824 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2825 2826 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 2827 { 2828 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2829 PetscErrorCode ierr; 2830 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2831 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2832 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2833 2834 PetscFunctionBegin; 2835 ierr = PetscMalloc1(M+1,&ii);CHKERRQ(ierr); 2836 ii[0] = 0; 2837 for (i=0; i<M; i++) { 2838 PetscCheckFalse((id[i+1] - id[i]) < 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT,i,id[i],id[i+1]); 2839 PetscCheckFalse((io[i+1] - io[i]) < 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT,i,io[i],io[i+1]); 2840 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2841 /* remove one from count of matrix has diagonal */ 2842 for (j=id[i]; j<id[i+1]; j++) { 2843 if (jd[j] == i) {ii[i+1]--;break;} 2844 } 2845 } 2846 ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr); 2847 cnt = 0; 2848 for (i=0; i<M; i++) { 2849 for (j=io[i]; j<io[i+1]; j++) { 2850 if (garray[jo[j]] > rstart) break; 2851 jj[cnt++] = garray[jo[j]]; 2852 } 2853 for (k=id[i]; k<id[i+1]; k++) { 2854 if (jd[k] != i) { 2855 jj[cnt++] = rstart + jd[k]; 2856 } 2857 } 2858 for (; j<io[i+1]; j++) { 2859 jj[cnt++] = garray[jo[j]]; 2860 } 2861 } 2862 ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr); 2863 PetscFunctionReturn(0); 2864 } 2865 2866 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2867 2868 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 2869 2870 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 2871 { 2872 PetscErrorCode ierr; 2873 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2874 Mat B; 2875 Mat_MPIAIJ *b; 2876 2877 PetscFunctionBegin; 2878 PetscCheckFalse(!A->assembled,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 2879 2880 if (reuse == MAT_REUSE_MATRIX) { 2881 B = *newmat; 2882 } else { 2883 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2884 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2885 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2886 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2887 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 2888 ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 2889 } 2890 b = (Mat_MPIAIJ*) B->data; 2891 2892 if (reuse == MAT_REUSE_MATRIX) { 2893 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A);CHKERRQ(ierr); 2894 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B);CHKERRQ(ierr); 2895 } else { 2896 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 2897 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2898 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 2899 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 2900 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 2901 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2902 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2903 } 2904 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2905 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2906 2907 if (reuse == MAT_INPLACE_MATRIX) { 2908 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 2909 } else { 2910 *newmat = B; 2911 } 2912 PetscFunctionReturn(0); 2913 } 2914 2915 /*MC 2916 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2917 2918 Options Database Keys: 2919 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2920 . -mat_block_size <bs> - set the blocksize used to store the matrix 2921 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 2922 - -mat_use_hash_table <fact> 2923 2924 Level: beginner 2925 2926 Notes: 2927 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 2928 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 2929 2930 .seealso: MatCreateBAIJ 2931 M*/ 2932 2933 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 2934 2935 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2936 { 2937 Mat_MPIBAIJ *b; 2938 PetscErrorCode ierr; 2939 PetscBool flg = PETSC_FALSE; 2940 2941 PetscFunctionBegin; 2942 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2943 B->data = (void*)b; 2944 2945 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2946 B->assembled = PETSC_FALSE; 2947 2948 B->insertmode = NOT_SET_VALUES; 2949 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr); 2950 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRMPI(ierr); 2951 2952 /* build local table of row and column ownerships */ 2953 ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr); 2954 2955 /* build cache for off array entries formed */ 2956 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 2957 2958 b->donotstash = PETSC_FALSE; 2959 b->colmap = NULL; 2960 b->garray = NULL; 2961 b->roworiented = PETSC_TRUE; 2962 2963 /* stuff used in block assembly */ 2964 b->barray = NULL; 2965 2966 /* stuff used for matrix vector multiply */ 2967 b->lvec = NULL; 2968 b->Mvctx = NULL; 2969 2970 /* stuff for MatGetRow() */ 2971 b->rowindices = NULL; 2972 b->rowvalues = NULL; 2973 b->getrowactive = PETSC_FALSE; 2974 2975 /* hash table stuff */ 2976 b->ht = NULL; 2977 b->hd = NULL; 2978 b->ht_size = 0; 2979 b->ht_flag = PETSC_FALSE; 2980 b->ht_fact = 0; 2981 b->ht_total_ct = 0; 2982 b->ht_insert_ct = 0; 2983 2984 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2985 b->ijonly = PETSC_FALSE; 2986 2987 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 2988 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 2989 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 2990 #if defined(PETSC_HAVE_HYPRE) 2991 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 2992 #endif 2993 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2994 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2995 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2996 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 2997 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2998 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2999 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 3000 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3001 3002 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3003 ierr = PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg);CHKERRQ(ierr); 3004 if (flg) { 3005 PetscReal fact = 1.39; 3006 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3007 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 3008 if (fact <= 1.0) fact = 1.39; 3009 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3010 ierr = PetscInfo(B,"Hash table Factor used %5.2g\n",(double)fact);CHKERRQ(ierr); 3011 } 3012 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3013 PetscFunctionReturn(0); 3014 } 3015 3016 /*MC 3017 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3018 3019 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3020 and MATMPIBAIJ otherwise. 3021 3022 Options Database Keys: 3023 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3024 3025 Level: beginner 3026 3027 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3028 M*/ 3029 3030 /*@C 3031 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3032 (block compressed row). For good matrix assembly performance 3033 the user should preallocate the matrix storage by setting the parameters 3034 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3035 performance can be increased by more than a factor of 50. 3036 3037 Collective on Mat 3038 3039 Input Parameters: 3040 + B - the matrix 3041 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3042 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3043 . d_nz - number of block nonzeros per block row in diagonal portion of local 3044 submatrix (same for all local rows) 3045 . d_nnz - array containing the number of block nonzeros in the various block rows 3046 of the in diagonal portion of the local (possibly different for each block 3047 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 3048 set it even if it is zero. 3049 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3050 submatrix (same for all local rows). 3051 - o_nnz - array containing the number of nonzeros in the various block rows of the 3052 off-diagonal portion of the local submatrix (possibly different for 3053 each block row) or NULL. 3054 3055 If the *_nnz parameter is given then the *_nz parameter is ignored 3056 3057 Options Database Keys: 3058 + -mat_block_size - size of the blocks to use 3059 - -mat_use_hash_table <fact> 3060 3061 Notes: 3062 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3063 than it must be used on all processors that share the object for that argument. 3064 3065 Storage Information: 3066 For a square global matrix we define each processor's diagonal portion 3067 to be its local rows and the corresponding columns (a square submatrix); 3068 each processor's off-diagonal portion encompasses the remainder of the 3069 local matrix (a rectangular submatrix). 3070 3071 The user can specify preallocated storage for the diagonal part of 3072 the local submatrix with either d_nz or d_nnz (not both). Set 3073 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3074 memory allocation. Likewise, specify preallocated storage for the 3075 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3076 3077 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3078 the figure below we depict these three local rows and all columns (0-11). 3079 3080 .vb 3081 0 1 2 3 4 5 6 7 8 9 10 11 3082 -------------------------- 3083 row 3 |o o o d d d o o o o o o 3084 row 4 |o o o d d d o o o o o o 3085 row 5 |o o o d d d o o o o o o 3086 -------------------------- 3087 .ve 3088 3089 Thus, any entries in the d locations are stored in the d (diagonal) 3090 submatrix, and any entries in the o locations are stored in the 3091 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3092 stored simply in the MATSEQBAIJ format for compressed row storage. 3093 3094 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3095 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3096 In general, for PDE problems in which most nonzeros are near the diagonal, 3097 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3098 or you will get TERRIBLE performance; see the users' manual chapter on 3099 matrices. 3100 3101 You can call MatGetInfo() to get information on how effective the preallocation was; 3102 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3103 You can also run with the option -info and look for messages with the string 3104 malloc in them to see if additional memory allocation was needed. 3105 3106 Level: intermediate 3107 3108 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3109 @*/ 3110 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3111 { 3112 PetscErrorCode ierr; 3113 3114 PetscFunctionBegin; 3115 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3116 PetscValidType(B,1); 3117 PetscValidLogicalCollectiveInt(B,bs,2); 3118 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3119 PetscFunctionReturn(0); 3120 } 3121 3122 /*@C 3123 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3124 (block compressed row). For good matrix assembly performance 3125 the user should preallocate the matrix storage by setting the parameters 3126 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3127 performance can be increased by more than a factor of 50. 3128 3129 Collective 3130 3131 Input Parameters: 3132 + comm - MPI communicator 3133 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3134 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3135 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3136 This value should be the same as the local size used in creating the 3137 y vector for the matrix-vector product y = Ax. 3138 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3139 This value should be the same as the local size used in creating the 3140 x vector for the matrix-vector product y = Ax. 3141 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3142 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3143 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3144 submatrix (same for all local rows) 3145 . d_nnz - array containing the number of nonzero blocks in the various block rows 3146 of the in diagonal portion of the local (possibly different for each block 3147 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3148 and set it even if it is zero. 3149 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3150 submatrix (same for all local rows). 3151 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3152 off-diagonal portion of the local submatrix (possibly different for 3153 each block row) or NULL. 3154 3155 Output Parameter: 3156 . A - the matrix 3157 3158 Options Database Keys: 3159 + -mat_block_size - size of the blocks to use 3160 - -mat_use_hash_table <fact> 3161 3162 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3163 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3164 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3165 3166 Notes: 3167 If the *_nnz parameter is given then the *_nz parameter is ignored 3168 3169 A nonzero block is any block that as 1 or more nonzeros in it 3170 3171 The user MUST specify either the local or global matrix dimensions 3172 (possibly both). 3173 3174 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3175 than it must be used on all processors that share the object for that argument. 3176 3177 Storage Information: 3178 For a square global matrix we define each processor's diagonal portion 3179 to be its local rows and the corresponding columns (a square submatrix); 3180 each processor's off-diagonal portion encompasses the remainder of the 3181 local matrix (a rectangular submatrix). 3182 3183 The user can specify preallocated storage for the diagonal part of 3184 the local submatrix with either d_nz or d_nnz (not both). Set 3185 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3186 memory allocation. Likewise, specify preallocated storage for the 3187 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3188 3189 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3190 the figure below we depict these three local rows and all columns (0-11). 3191 3192 .vb 3193 0 1 2 3 4 5 6 7 8 9 10 11 3194 -------------------------- 3195 row 3 |o o o d d d o o o o o o 3196 row 4 |o o o d d d o o o o o o 3197 row 5 |o o o d d d o o o o o o 3198 -------------------------- 3199 .ve 3200 3201 Thus, any entries in the d locations are stored in the d (diagonal) 3202 submatrix, and any entries in the o locations are stored in the 3203 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3204 stored simply in the MATSEQBAIJ format for compressed row storage. 3205 3206 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3207 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3208 In general, for PDE problems in which most nonzeros are near the diagonal, 3209 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3210 or you will get TERRIBLE performance; see the users' manual chapter on 3211 matrices. 3212 3213 Level: intermediate 3214 3215 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3216 @*/ 3217 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3218 { 3219 PetscErrorCode ierr; 3220 PetscMPIInt size; 3221 3222 PetscFunctionBegin; 3223 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3224 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3225 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3226 if (size > 1) { 3227 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3228 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3229 } else { 3230 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3231 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3232 } 3233 PetscFunctionReturn(0); 3234 } 3235 3236 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3237 { 3238 Mat mat; 3239 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3240 PetscErrorCode ierr; 3241 PetscInt len=0; 3242 3243 PetscFunctionBegin; 3244 *newmat = NULL; 3245 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3246 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3247 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3248 3249 mat->factortype = matin->factortype; 3250 mat->preallocated = PETSC_TRUE; 3251 mat->assembled = PETSC_TRUE; 3252 mat->insertmode = NOT_SET_VALUES; 3253 3254 a = (Mat_MPIBAIJ*)mat->data; 3255 mat->rmap->bs = matin->rmap->bs; 3256 a->bs2 = oldmat->bs2; 3257 a->mbs = oldmat->mbs; 3258 a->nbs = oldmat->nbs; 3259 a->Mbs = oldmat->Mbs; 3260 a->Nbs = oldmat->Nbs; 3261 3262 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3263 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3264 3265 a->size = oldmat->size; 3266 a->rank = oldmat->rank; 3267 a->donotstash = oldmat->donotstash; 3268 a->roworiented = oldmat->roworiented; 3269 a->rowindices = NULL; 3270 a->rowvalues = NULL; 3271 a->getrowactive = PETSC_FALSE; 3272 a->barray = NULL; 3273 a->rstartbs = oldmat->rstartbs; 3274 a->rendbs = oldmat->rendbs; 3275 a->cstartbs = oldmat->cstartbs; 3276 a->cendbs = oldmat->cendbs; 3277 3278 /* hash table stuff */ 3279 a->ht = NULL; 3280 a->hd = NULL; 3281 a->ht_size = 0; 3282 a->ht_flag = oldmat->ht_flag; 3283 a->ht_fact = oldmat->ht_fact; 3284 a->ht_total_ct = 0; 3285 a->ht_insert_ct = 0; 3286 3287 ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+1);CHKERRQ(ierr); 3288 if (oldmat->colmap) { 3289 #if defined(PETSC_USE_CTABLE) 3290 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3291 #else 3292 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 3293 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3294 ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr); 3295 #endif 3296 } else a->colmap = NULL; 3297 3298 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3299 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 3300 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3301 ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); 3302 } else a->garray = NULL; 3303 3304 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3305 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3306 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3307 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3308 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3309 3310 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3311 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3312 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3313 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3314 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3315 *newmat = mat; 3316 PetscFunctionReturn(0); 3317 } 3318 3319 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3320 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 3321 { 3322 PetscInt header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k; 3323 PetscInt *rowidxs,*colidxs,rs,cs,ce; 3324 PetscScalar *matvals; 3325 PetscErrorCode ierr; 3326 3327 PetscFunctionBegin; 3328 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3329 3330 /* read in matrix header */ 3331 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3332 PetscCheckFalse(header[0] != MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3333 M = header[1]; N = header[2]; nz = header[3]; 3334 PetscCheckFalse(M < 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M); 3335 PetscCheckFalse(N < 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N); 3336 PetscCheckFalse(nz < 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3337 3338 /* set block sizes from the viewer's .info file */ 3339 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3340 /* set local sizes if not set already */ 3341 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3342 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3343 /* set global sizes if not set already */ 3344 if (mat->rmap->N < 0) mat->rmap->N = M; 3345 if (mat->cmap->N < 0) mat->cmap->N = N; 3346 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3347 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3348 3349 /* check if the matrix sizes are correct */ 3350 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3351 PetscCheckFalse(M != rows || N != cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols); 3352 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 3353 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 3354 ierr = PetscLayoutGetRange(mat->rmap,&rs,NULL);CHKERRQ(ierr); 3355 ierr = PetscLayoutGetRange(mat->cmap,&cs,&ce);CHKERRQ(ierr); 3356 mbs = m/bs; nbs = n/bs; 3357 3358 /* read in row lengths and build row indices */ 3359 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3360 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 3361 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3362 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRMPI(ierr); 3363 PetscCheckFalse(sum != nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum); 3364 3365 /* read in column indices and matrix values */ 3366 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 3367 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 3368 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 3369 3370 { /* preallocate matrix storage */ 3371 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3372 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3373 PetscBool sbaij,done; 3374 PetscInt *d_nnz,*o_nnz; 3375 3376 ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr); 3377 ierr = PetscHSetICreate(&ht);CHKERRQ(ierr); 3378 ierr = PetscCalloc2(mbs,&d_nnz,mbs,&o_nnz);CHKERRQ(ierr); 3379 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPISBAIJ,&sbaij);CHKERRQ(ierr); 3380 for (i=0; i<mbs; i++) { 3381 ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr); 3382 ierr = PetscHSetIClear(ht);CHKERRQ(ierr); 3383 for (k=0; k<bs; k++) { 3384 PetscInt row = bs*i + k; 3385 for (j=rowidxs[row]; j<rowidxs[row+1]; j++) { 3386 PetscInt col = colidxs[j]; 3387 if (!sbaij || col >= row) { 3388 if (col >= cs && col < ce) { 3389 if (!PetscBTLookupSet(bt,(col-cs)/bs)) d_nnz[i]++; 3390 } else { 3391 ierr = PetscHSetIQueryAdd(ht,col/bs,&done);CHKERRQ(ierr); 3392 if (done) o_nnz[i]++; 3393 } 3394 } 3395 } 3396 } 3397 } 3398 ierr = PetscBTDestroy(&bt);CHKERRQ(ierr); 3399 ierr = PetscHSetIDestroy(&ht);CHKERRQ(ierr); 3400 ierr = MatMPIBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3401 ierr = MatMPISBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3402 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3403 } 3404 3405 /* store matrix values */ 3406 for (i=0; i<m; i++) { 3407 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i+1]; 3408 ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr); 3409 } 3410 3411 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3412 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3413 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3414 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3415 PetscFunctionReturn(0); 3416 } 3417 3418 PetscErrorCode MatLoad_MPIBAIJ(Mat mat,PetscViewer viewer) 3419 { 3420 PetscErrorCode ierr; 3421 PetscBool isbinary; 3422 3423 PetscFunctionBegin; 3424 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3425 PetscCheckFalse(!isbinary,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 3426 ierr = MatLoad_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 3427 PetscFunctionReturn(0); 3428 } 3429 3430 /*@ 3431 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3432 3433 Input Parameters: 3434 + mat - the matrix 3435 - fact - factor 3436 3437 Not Collective, each process can use a different factor 3438 3439 Level: advanced 3440 3441 Notes: 3442 This can also be set by the command line option: -mat_use_hash_table <fact> 3443 3444 .seealso: MatSetOption() 3445 @*/ 3446 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3447 { 3448 PetscErrorCode ierr; 3449 3450 PetscFunctionBegin; 3451 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3452 PetscFunctionReturn(0); 3453 } 3454 3455 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3456 { 3457 Mat_MPIBAIJ *baij; 3458 3459 PetscFunctionBegin; 3460 baij = (Mat_MPIBAIJ*)mat->data; 3461 baij->ht_fact = fact; 3462 PetscFunctionReturn(0); 3463 } 3464 3465 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3466 { 3467 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3468 PetscBool flg; 3469 PetscErrorCode ierr; 3470 3471 PetscFunctionBegin; 3472 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg);CHKERRQ(ierr); 3473 PetscCheckFalse(!flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input"); 3474 if (Ad) *Ad = a->A; 3475 if (Ao) *Ao = a->B; 3476 if (colmap) *colmap = a->garray; 3477 PetscFunctionReturn(0); 3478 } 3479 3480 /* 3481 Special version for direct calls from Fortran (to eliminate two function call overheads 3482 */ 3483 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3484 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3485 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3486 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3487 #endif 3488 3489 /*@C 3490 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3491 3492 Collective on Mat 3493 3494 Input Parameters: 3495 + mat - the matrix 3496 . min - number of input rows 3497 . im - input rows 3498 . nin - number of input columns 3499 . in - input columns 3500 . v - numerical values input 3501 - addvin - INSERT_VALUES or ADD_VALUES 3502 3503 Notes: 3504 This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3505 3506 Level: advanced 3507 3508 .seealso: MatSetValuesBlocked() 3509 @*/ 3510 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3511 { 3512 /* convert input arguments to C version */ 3513 Mat mat = *matin; 3514 PetscInt m = *min, n = *nin; 3515 InsertMode addv = *addvin; 3516 3517 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3518 const MatScalar *value; 3519 MatScalar *barray = baij->barray; 3520 PetscBool roworiented = baij->roworiented; 3521 PetscErrorCode ierr; 3522 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3523 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3524 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3525 3526 PetscFunctionBegin; 3527 /* tasks normally handled by MatSetValuesBlocked() */ 3528 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3529 else PetscCheckFalse(mat->insertmode != addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3530 PetscCheckFalse(mat->factortype,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3531 if (mat->assembled) { 3532 mat->was_assembled = PETSC_TRUE; 3533 mat->assembled = PETSC_FALSE; 3534 } 3535 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3536 3537 if (!barray) { 3538 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 3539 baij->barray = barray; 3540 } 3541 3542 if (roworiented) stepval = (n-1)*bs; 3543 else stepval = (m-1)*bs; 3544 3545 for (i=0; i<m; i++) { 3546 if (im[i] < 0) continue; 3547 PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1); 3548 if (im[i] >= rstart && im[i] < rend) { 3549 row = im[i] - rstart; 3550 for (j=0; j<n; j++) { 3551 /* If NumCol = 1 then a copy is not required */ 3552 if ((roworiented) && (n == 1)) { 3553 barray = (MatScalar*)v + i*bs2; 3554 } else if ((!roworiented) && (m == 1)) { 3555 barray = (MatScalar*)v + j*bs2; 3556 } else { /* Here a copy is required */ 3557 if (roworiented) { 3558 value = v + i*(stepval+bs)*bs + j*bs; 3559 } else { 3560 value = v + j*(stepval+bs)*bs + i*bs; 3561 } 3562 for (ii=0; ii<bs; ii++,value+=stepval) { 3563 for (jj=0; jj<bs; jj++) { 3564 *barray++ = *value++; 3565 } 3566 } 3567 barray -=bs2; 3568 } 3569 3570 if (in[j] >= cstart && in[j] < cend) { 3571 col = in[j] - cstart; 3572 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 3573 } else if (in[j] < 0) continue; 3574 else PetscCheck(in[j] < baij->Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],baij->Nbs-1); 3575 else { 3576 if (mat->was_assembled) { 3577 if (!baij->colmap) { 3578 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3579 } 3580 3581 #if defined(PETSC_USE_DEBUG) 3582 #if defined(PETSC_USE_CTABLE) 3583 { PetscInt data; 3584 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3585 PetscCheckFalse((data - 1) % bs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3586 } 3587 #else 3588 PetscCheckFalse((baij->colmap[in[j]] - 1) % bs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3589 #endif 3590 #endif 3591 #if defined(PETSC_USE_CTABLE) 3592 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3593 col = (col - 1)/bs; 3594 #else 3595 col = (baij->colmap[in[j]] - 1)/bs; 3596 #endif 3597 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3598 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3599 col = in[j]; 3600 } 3601 } else col = in[j]; 3602 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 3603 } 3604 } 3605 } else { 3606 if (!baij->donotstash) { 3607 if (roworiented) { 3608 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3609 } else { 3610 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3611 } 3612 } 3613 } 3614 } 3615 3616 /* task normally handled by MatSetValuesBlocked() */ 3617 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3618 PetscFunctionReturn(0); 3619 } 3620 3621 /*@ 3622 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block 3623 CSR format the local rows. 3624 3625 Collective 3626 3627 Input Parameters: 3628 + comm - MPI communicator 3629 . bs - the block size, only a block size of 1 is supported 3630 . m - number of local rows (Cannot be PETSC_DECIDE) 3631 . n - This value should be the same as the local size used in creating the 3632 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3633 calculated if N is given) For square matrices n is almost always m. 3634 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3635 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3636 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3637 . j - column indices 3638 - a - matrix values 3639 3640 Output Parameter: 3641 . mat - the matrix 3642 3643 Level: intermediate 3644 3645 Notes: 3646 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3647 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3648 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3649 3650 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3651 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3652 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3653 with column-major ordering within blocks. 3654 3655 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3656 3657 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3658 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3659 @*/ 3660 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3661 { 3662 PetscErrorCode ierr; 3663 3664 PetscFunctionBegin; 3665 PetscCheckFalse(i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3666 PetscCheckFalse(m < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3667 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3668 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3669 ierr = MatSetType(*mat,MATMPIBAIJ);CHKERRQ(ierr); 3670 ierr = MatSetBlockSize(*mat,bs);CHKERRQ(ierr); 3671 ierr = MatSetUp(*mat);CHKERRQ(ierr); 3672 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3673 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3674 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3675 PetscFunctionReturn(0); 3676 } 3677 3678 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3679 { 3680 PetscErrorCode ierr; 3681 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3682 PetscInt *indx; 3683 PetscScalar *values; 3684 3685 PetscFunctionBegin; 3686 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3687 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3688 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inmat->data; 3689 PetscInt *dnz,*onz,mbs,Nbs,nbs; 3690 PetscInt *bindx,rmax=a->rmax,j; 3691 PetscMPIInt rank,size; 3692 3693 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3694 mbs = m/bs; Nbs = N/cbs; 3695 if (n == PETSC_DECIDE) { 3696 ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N);CHKERRQ(ierr); 3697 } 3698 nbs = n/cbs; 3699 3700 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 3701 ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */ 3702 3703 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 3704 ierr = MPI_Comm_rank(comm,&size);CHKERRMPI(ierr); 3705 if (rank == size-1) { 3706 /* Check sum(nbs) = Nbs */ 3707 PetscCheckFalse(__end != Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT,__end,Nbs); 3708 } 3709 3710 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */ 3711 for (i=0; i<mbs; i++) { 3712 ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3713 nnz = nnz/bs; 3714 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3715 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3716 ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3717 } 3718 ierr = PetscFree(bindx);CHKERRQ(ierr); 3719 3720 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3721 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3722 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3723 ierr = MatSetType(*outmat,MATBAIJ);CHKERRQ(ierr); 3724 ierr = MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr); 3725 ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3726 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3727 ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3728 } 3729 3730 /* numeric phase */ 3731 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3732 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3733 3734 for (i=0; i<m; i++) { 3735 ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3736 Ii = i + rstart; 3737 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3738 ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3739 } 3740 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3741 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3742 PetscFunctionReturn(0); 3743 } 3744