1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 #include <petscsf.h> 5 6 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 7 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat); 8 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 9 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 10 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 11 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec); 14 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 17 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 18 { 19 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 20 PetscErrorCode ierr; 21 PetscInt i,*idxb = 0; 22 PetscScalar *va,*vb; 23 Vec vtmp; 24 25 PetscFunctionBegin; 26 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 27 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 28 if (idx) { 29 for (i=0; i<A->rmap->n; i++) { 30 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 31 } 32 } 33 34 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 35 if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);} 36 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 37 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 38 39 for (i=0; i<A->rmap->n; i++) { 40 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 41 va[i] = vb[i]; 42 if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs); 43 } 44 } 45 46 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 47 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 48 ierr = PetscFree(idxb);CHKERRQ(ierr); 49 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 50 PetscFunctionReturn(0); 51 } 52 53 #undef __FUNCT__ 54 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 55 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 56 { 57 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 58 PetscErrorCode ierr; 59 60 PetscFunctionBegin; 61 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 62 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 63 PetscFunctionReturn(0); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 68 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 69 { 70 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 75 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 76 PetscFunctionReturn(0); 77 } 78 79 /* 80 Local utility routine that creates a mapping from the global column 81 number to the local number in the off-diagonal part of the local 82 storage of the matrix. This is done in a non scalable way since the 83 length of colmap equals the global matrix length. 84 */ 85 #undef __FUNCT__ 86 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private" 87 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 88 { 89 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 90 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 91 PetscErrorCode ierr; 92 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 93 94 PetscFunctionBegin; 95 #if defined(PETSC_USE_CTABLE) 96 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 97 for (i=0; i<nbs; i++) { 98 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 99 } 100 #else 101 ierr = PetscMalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 102 ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 103 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 104 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 105 #endif 106 PetscFunctionReturn(0); 107 } 108 109 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 110 { \ 111 \ 112 brow = row/bs; \ 113 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 114 rmax = aimax[brow]; nrow = ailen[brow]; \ 115 bcol = col/bs; \ 116 ridx = row % bs; cidx = col % bs; \ 117 low = 0; high = nrow; \ 118 while (high-low > 3) { \ 119 t = (low+high)/2; \ 120 if (rp[t] > bcol) high = t; \ 121 else low = t; \ 122 } \ 123 for (_i=low; _i<high; _i++) { \ 124 if (rp[_i] > bcol) break; \ 125 if (rp[_i] == bcol) { \ 126 bap = ap + bs2*_i + bs*cidx + ridx; \ 127 if (addv == ADD_VALUES) *bap += value; \ 128 else *bap = value; \ 129 goto a_noinsert; \ 130 } \ 131 } \ 132 if (a->nonew == 1) goto a_noinsert; \ 133 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 134 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 135 N = nrow++ - 1; \ 136 /* shift up all the later entries in this row */ \ 137 for (ii=N; ii>=_i; ii--) { \ 138 rp[ii+1] = rp[ii]; \ 139 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 140 } \ 141 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 142 rp[_i] = bcol; \ 143 ap[bs2*_i + bs*cidx + ridx] = value; \ 144 a_noinsert:; \ 145 ailen[brow] = nrow; \ 146 } 147 148 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 149 { \ 150 brow = row/bs; \ 151 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 152 rmax = bimax[brow]; nrow = bilen[brow]; \ 153 bcol = col/bs; \ 154 ridx = row % bs; cidx = col % bs; \ 155 low = 0; high = nrow; \ 156 while (high-low > 3) { \ 157 t = (low+high)/2; \ 158 if (rp[t] > bcol) high = t; \ 159 else low = t; \ 160 } \ 161 for (_i=low; _i<high; _i++) { \ 162 if (rp[_i] > bcol) break; \ 163 if (rp[_i] == bcol) { \ 164 bap = ap + bs2*_i + bs*cidx + ridx; \ 165 if (addv == ADD_VALUES) *bap += value; \ 166 else *bap = value; \ 167 goto b_noinsert; \ 168 } \ 169 } \ 170 if (b->nonew == 1) goto b_noinsert; \ 171 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 172 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 173 N = nrow++ - 1; \ 174 /* shift up all the later entries in this row */ \ 175 for (ii=N; ii>=_i; ii--) { \ 176 rp[ii+1] = rp[ii]; \ 177 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 178 } \ 179 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 180 rp[_i] = bcol; \ 181 ap[bs2*_i + bs*cidx + ridx] = value; \ 182 b_noinsert:; \ 183 bilen[brow] = nrow; \ 184 } 185 186 #undef __FUNCT__ 187 #define __FUNCT__ "MatSetValues_MPIBAIJ" 188 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 189 { 190 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 191 MatScalar value; 192 PetscBool roworiented = baij->roworiented; 193 PetscErrorCode ierr; 194 PetscInt i,j,row,col; 195 PetscInt rstart_orig=mat->rmap->rstart; 196 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 197 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 198 199 /* Some Variables required in the macro */ 200 Mat A = baij->A; 201 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 202 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 203 MatScalar *aa =a->a; 204 205 Mat B = baij->B; 206 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 207 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 208 MatScalar *ba =b->a; 209 210 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 211 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 212 MatScalar *ap,*bap; 213 214 PetscFunctionBegin; 215 for (i=0; i<m; i++) { 216 if (im[i] < 0) continue; 217 #if defined(PETSC_USE_DEBUG) 218 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 219 #endif 220 if (im[i] >= rstart_orig && im[i] < rend_orig) { 221 row = im[i] - rstart_orig; 222 for (j=0; j<n; j++) { 223 if (in[j] >= cstart_orig && in[j] < cend_orig) { 224 col = in[j] - cstart_orig; 225 if (roworiented) value = v[i*n+j]; 226 else value = v[i+j*m]; 227 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 228 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 229 } else if (in[j] < 0) continue; 230 #if defined(PETSC_USE_DEBUG) 231 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 232 #endif 233 else { 234 if (mat->was_assembled) { 235 if (!baij->colmap) { 236 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 237 } 238 #if defined(PETSC_USE_CTABLE) 239 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 240 col = col - 1; 241 #else 242 col = baij->colmap[in[j]/bs] - 1; 243 #endif 244 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 245 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 246 col = in[j]; 247 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 248 B = baij->B; 249 b = (Mat_SeqBAIJ*)(B)->data; 250 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 251 ba =b->a; 252 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 253 else col += in[j]%bs; 254 } else col = in[j]; 255 if (roworiented) value = v[i*n+j]; 256 else value = v[i+j*m]; 257 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 258 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 259 } 260 } 261 } else { 262 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 263 if (!baij->donotstash) { 264 mat->assembled = PETSC_FALSE; 265 if (roworiented) { 266 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 267 } else { 268 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 269 } 270 } 271 } 272 } 273 PetscFunctionReturn(0); 274 } 275 276 #undef __FUNCT__ 277 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 278 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 279 { 280 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 281 const PetscScalar *value; 282 MatScalar *barray = baij->barray; 283 PetscBool roworiented = baij->roworiented; 284 PetscErrorCode ierr; 285 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 286 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 287 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 288 289 PetscFunctionBegin; 290 if (!barray) { 291 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 292 baij->barray = barray; 293 } 294 295 if (roworiented) stepval = (n-1)*bs; 296 else stepval = (m-1)*bs; 297 298 for (i=0; i<m; i++) { 299 if (im[i] < 0) continue; 300 #if defined(PETSC_USE_DEBUG) 301 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 302 #endif 303 if (im[i] >= rstart && im[i] < rend) { 304 row = im[i] - rstart; 305 for (j=0; j<n; j++) { 306 /* If NumCol = 1 then a copy is not required */ 307 if ((roworiented) && (n == 1)) { 308 barray = (MatScalar*)v + i*bs2; 309 } else if ((!roworiented) && (m == 1)) { 310 barray = (MatScalar*)v + j*bs2; 311 } else { /* Here a copy is required */ 312 if (roworiented) { 313 value = v + (i*(stepval+bs) + j)*bs; 314 } else { 315 value = v + (j*(stepval+bs) + i)*bs; 316 } 317 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 318 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 319 barray += bs; 320 } 321 barray -= bs2; 322 } 323 324 if (in[j] >= cstart && in[j] < cend) { 325 col = in[j] - cstart; 326 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 327 } else if (in[j] < 0) continue; 328 #if defined(PETSC_USE_DEBUG) 329 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 330 #endif 331 else { 332 if (mat->was_assembled) { 333 if (!baij->colmap) { 334 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 335 } 336 337 #if defined(PETSC_USE_DEBUG) 338 #if defined(PETSC_USE_CTABLE) 339 { PetscInt data; 340 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 341 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 342 } 343 #else 344 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 345 #endif 346 #endif 347 #if defined(PETSC_USE_CTABLE) 348 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 349 col = (col - 1)/bs; 350 #else 351 col = (baij->colmap[in[j]] - 1)/bs; 352 #endif 353 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 354 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 355 col = in[j]; 356 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]); 357 } else col = in[j]; 358 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 359 } 360 } 361 } else { 362 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 363 if (!baij->donotstash) { 364 if (roworiented) { 365 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 366 } else { 367 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 368 } 369 } 370 } 371 } 372 PetscFunctionReturn(0); 373 } 374 375 #define HASH_KEY 0.6180339887 376 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 377 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 378 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 379 #undef __FUNCT__ 380 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 381 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 382 { 383 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 384 PetscBool roworiented = baij->roworiented; 385 PetscErrorCode ierr; 386 PetscInt i,j,row,col; 387 PetscInt rstart_orig=mat->rmap->rstart; 388 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 389 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 390 PetscReal tmp; 391 MatScalar **HD = baij->hd,value; 392 #if defined(PETSC_USE_DEBUG) 393 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 394 #endif 395 396 PetscFunctionBegin; 397 for (i=0; i<m; i++) { 398 #if defined(PETSC_USE_DEBUG) 399 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 400 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 401 #endif 402 row = im[i]; 403 if (row >= rstart_orig && row < rend_orig) { 404 for (j=0; j<n; j++) { 405 col = in[j]; 406 if (roworiented) value = v[i*n+j]; 407 else value = v[i+j*m]; 408 /* Look up PetscInto the Hash Table */ 409 key = (row/bs)*Nbs+(col/bs)+1; 410 h1 = HASH(size,key,tmp); 411 412 413 idx = h1; 414 #if defined(PETSC_USE_DEBUG) 415 insert_ct++; 416 total_ct++; 417 if (HT[idx] != key) { 418 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 419 if (idx == size) { 420 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 421 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 422 } 423 } 424 #else 425 if (HT[idx] != key) { 426 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 427 if (idx == size) { 428 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 429 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 430 } 431 } 432 #endif 433 /* A HASH table entry is found, so insert the values at the correct address */ 434 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 435 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 436 } 437 } else if (!baij->donotstash) { 438 if (roworiented) { 439 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 440 } else { 441 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 442 } 443 } 444 } 445 #if defined(PETSC_USE_DEBUG) 446 baij->ht_total_ct = total_ct; 447 baij->ht_insert_ct = insert_ct; 448 #endif 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 454 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 455 { 456 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 457 PetscBool roworiented = baij->roworiented; 458 PetscErrorCode ierr; 459 PetscInt i,j,ii,jj,row,col; 460 PetscInt rstart=baij->rstartbs; 461 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 462 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 463 PetscReal tmp; 464 MatScalar **HD = baij->hd,*baij_a; 465 const PetscScalar *v_t,*value; 466 #if defined(PETSC_USE_DEBUG) 467 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 468 #endif 469 470 PetscFunctionBegin; 471 if (roworiented) stepval = (n-1)*bs; 472 else stepval = (m-1)*bs; 473 474 for (i=0; i<m; i++) { 475 #if defined(PETSC_USE_DEBUG) 476 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 477 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 478 #endif 479 row = im[i]; 480 v_t = v + i*nbs2; 481 if (row >= rstart && row < rend) { 482 for (j=0; j<n; j++) { 483 col = in[j]; 484 485 /* Look up into the Hash Table */ 486 key = row*Nbs+col+1; 487 h1 = HASH(size,key,tmp); 488 489 idx = h1; 490 #if defined(PETSC_USE_DEBUG) 491 total_ct++; 492 insert_ct++; 493 if (HT[idx] != key) { 494 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 495 if (idx == size) { 496 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 497 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 498 } 499 } 500 #else 501 if (HT[idx] != key) { 502 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 503 if (idx == size) { 504 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 505 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 506 } 507 } 508 #endif 509 baij_a = HD[idx]; 510 if (roworiented) { 511 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 512 /* value = v + (i*(stepval+bs)+j)*bs; */ 513 value = v_t; 514 v_t += bs; 515 if (addv == ADD_VALUES) { 516 for (ii=0; ii<bs; ii++,value+=stepval) { 517 for (jj=ii; jj<bs2; jj+=bs) { 518 baij_a[jj] += *value++; 519 } 520 } 521 } else { 522 for (ii=0; ii<bs; ii++,value+=stepval) { 523 for (jj=ii; jj<bs2; jj+=bs) { 524 baij_a[jj] = *value++; 525 } 526 } 527 } 528 } else { 529 value = v + j*(stepval+bs)*bs + i*bs; 530 if (addv == ADD_VALUES) { 531 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 532 for (jj=0; jj<bs; jj++) { 533 baij_a[jj] += *value++; 534 } 535 } 536 } else { 537 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 538 for (jj=0; jj<bs; jj++) { 539 baij_a[jj] = *value++; 540 } 541 } 542 } 543 } 544 } 545 } else { 546 if (!baij->donotstash) { 547 if (roworiented) { 548 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 549 } else { 550 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 551 } 552 } 553 } 554 } 555 #if defined(PETSC_USE_DEBUG) 556 baij->ht_total_ct = total_ct; 557 baij->ht_insert_ct = insert_ct; 558 #endif 559 PetscFunctionReturn(0); 560 } 561 562 #undef __FUNCT__ 563 #define __FUNCT__ "MatGetValues_MPIBAIJ" 564 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 565 { 566 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 567 PetscErrorCode ierr; 568 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 569 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 570 571 PetscFunctionBegin; 572 for (i=0; i<m; i++) { 573 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 574 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 575 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 576 row = idxm[i] - bsrstart; 577 for (j=0; j<n; j++) { 578 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 579 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 580 if (idxn[j] >= bscstart && idxn[j] < bscend) { 581 col = idxn[j] - bscstart; 582 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 583 } else { 584 if (!baij->colmap) { 585 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 586 } 587 #if defined(PETSC_USE_CTABLE) 588 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 589 data--; 590 #else 591 data = baij->colmap[idxn[j]/bs]-1; 592 #endif 593 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 594 else { 595 col = data + idxn[j]%bs; 596 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 597 } 598 } 599 } 600 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 601 } 602 PetscFunctionReturn(0); 603 } 604 605 #undef __FUNCT__ 606 #define __FUNCT__ "MatNorm_MPIBAIJ" 607 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 608 { 609 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 610 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 611 PetscErrorCode ierr; 612 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 613 PetscReal sum = 0.0; 614 MatScalar *v; 615 616 PetscFunctionBegin; 617 if (baij->size == 1) { 618 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 619 } else { 620 if (type == NORM_FROBENIUS) { 621 v = amat->a; 622 nz = amat->nz*bs2; 623 for (i=0; i<nz; i++) { 624 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 625 } 626 v = bmat->a; 627 nz = bmat->nz*bs2; 628 for (i=0; i<nz; i++) { 629 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 630 } 631 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 632 *nrm = PetscSqrtReal(*nrm); 633 } else if (type == NORM_1) { /* max column sum */ 634 PetscReal *tmp,*tmp2; 635 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 636 ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr); 637 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 638 v = amat->a; jj = amat->j; 639 for (i=0; i<amat->nz; i++) { 640 for (j=0; j<bs; j++) { 641 col = bs*(cstart + *jj) + j; /* column index */ 642 for (row=0; row<bs; row++) { 643 tmp[col] += PetscAbsScalar(*v); v++; 644 } 645 } 646 jj++; 647 } 648 v = bmat->a; jj = bmat->j; 649 for (i=0; i<bmat->nz; i++) { 650 for (j=0; j<bs; j++) { 651 col = bs*garray[*jj] + j; 652 for (row=0; row<bs; row++) { 653 tmp[col] += PetscAbsScalar(*v); v++; 654 } 655 } 656 jj++; 657 } 658 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 659 *nrm = 0.0; 660 for (j=0; j<mat->cmap->N; j++) { 661 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 662 } 663 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 664 } else if (type == NORM_INFINITY) { /* max row sum */ 665 PetscReal *sums; 666 ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr); 667 sum = 0.0; 668 for (j=0; j<amat->mbs; j++) { 669 for (row=0; row<bs; row++) sums[row] = 0.0; 670 v = amat->a + bs2*amat->i[j]; 671 nz = amat->i[j+1]-amat->i[j]; 672 for (i=0; i<nz; i++) { 673 for (col=0; col<bs; col++) { 674 for (row=0; row<bs; row++) { 675 sums[row] += PetscAbsScalar(*v); v++; 676 } 677 } 678 } 679 v = bmat->a + bs2*bmat->i[j]; 680 nz = bmat->i[j+1]-bmat->i[j]; 681 for (i=0; i<nz; i++) { 682 for (col=0; col<bs; col++) { 683 for (row=0; row<bs; row++) { 684 sums[row] += PetscAbsScalar(*v); v++; 685 } 686 } 687 } 688 for (row=0; row<bs; row++) { 689 if (sums[row] > sum) sum = sums[row]; 690 } 691 } 692 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 693 ierr = PetscFree(sums);CHKERRQ(ierr); 694 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 695 } 696 PetscFunctionReturn(0); 697 } 698 699 /* 700 Creates the hash table, and sets the table 701 This table is created only once. 702 If new entried need to be added to the matrix 703 then the hash table has to be destroyed and 704 recreated. 705 */ 706 #undef __FUNCT__ 707 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 708 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 709 { 710 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 711 Mat A = baij->A,B=baij->B; 712 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 713 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 714 PetscErrorCode ierr; 715 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 716 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 717 PetscInt *HT,key; 718 MatScalar **HD; 719 PetscReal tmp; 720 #if defined(PETSC_USE_INFO) 721 PetscInt ct=0,max=0; 722 #endif 723 724 PetscFunctionBegin; 725 if (baij->ht) PetscFunctionReturn(0); 726 727 baij->ht_size = (PetscInt)(factor*nz); 728 ht_size = baij->ht_size; 729 730 /* Allocate Memory for Hash Table */ 731 ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr); 732 HD = baij->hd; 733 HT = baij->ht; 734 735 /* Loop Over A */ 736 for (i=0; i<a->mbs; i++) { 737 for (j=ai[i]; j<ai[i+1]; j++) { 738 row = i+rstart; 739 col = aj[j]+cstart; 740 741 key = row*Nbs + col + 1; 742 h1 = HASH(ht_size,key,tmp); 743 for (k=0; k<ht_size; k++) { 744 if (!HT[(h1+k)%ht_size]) { 745 HT[(h1+k)%ht_size] = key; 746 HD[(h1+k)%ht_size] = a->a + j*bs2; 747 break; 748 #if defined(PETSC_USE_INFO) 749 } else { 750 ct++; 751 #endif 752 } 753 } 754 #if defined(PETSC_USE_INFO) 755 if (k> max) max = k; 756 #endif 757 } 758 } 759 /* Loop Over B */ 760 for (i=0; i<b->mbs; i++) { 761 for (j=bi[i]; j<bi[i+1]; j++) { 762 row = i+rstart; 763 col = garray[bj[j]]; 764 key = row*Nbs + col + 1; 765 h1 = HASH(ht_size,key,tmp); 766 for (k=0; k<ht_size; k++) { 767 if (!HT[(h1+k)%ht_size]) { 768 HT[(h1+k)%ht_size] = key; 769 HD[(h1+k)%ht_size] = b->a + j*bs2; 770 break; 771 #if defined(PETSC_USE_INFO) 772 } else { 773 ct++; 774 #endif 775 } 776 } 777 #if defined(PETSC_USE_INFO) 778 if (k> max) max = k; 779 #endif 780 } 781 } 782 783 /* Print Summary */ 784 #if defined(PETSC_USE_INFO) 785 for (i=0,j=0; i<ht_size; i++) { 786 if (HT[i]) j++; 787 } 788 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 789 #endif 790 PetscFunctionReturn(0); 791 } 792 793 #undef __FUNCT__ 794 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 795 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 796 { 797 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 798 PetscErrorCode ierr; 799 PetscInt nstash,reallocs; 800 InsertMode addv; 801 802 PetscFunctionBegin; 803 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 804 805 /* make sure all processors are either in INSERTMODE or ADDMODE */ 806 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 807 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 808 mat->insertmode = addv; /* in case this processor had no cache */ 809 810 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 811 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 812 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 813 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 814 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 815 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 816 PetscFunctionReturn(0); 817 } 818 819 #undef __FUNCT__ 820 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 821 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 822 { 823 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 824 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 825 PetscErrorCode ierr; 826 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 827 PetscInt *row,*col; 828 PetscBool r1,r2,r3,other_disassembled; 829 MatScalar *val; 830 InsertMode addv = mat->insertmode; 831 PetscMPIInt n; 832 833 PetscFunctionBegin; 834 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 835 if (!baij->donotstash && !mat->nooffprocentries) { 836 while (1) { 837 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 838 if (!flg) break; 839 840 for (i=0; i<n;) { 841 /* Now identify the consecutive vals belonging to the same row */ 842 for (j=i,rstart=row[j]; j<n; j++) { 843 if (row[j] != rstart) break; 844 } 845 if (j < n) ncols = j-i; 846 else ncols = n-i; 847 /* Now assemble all these values with a single function call */ 848 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 849 i = j; 850 } 851 } 852 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 853 /* Now process the block-stash. Since the values are stashed column-oriented, 854 set the roworiented flag to column oriented, and after MatSetValues() 855 restore the original flags */ 856 r1 = baij->roworiented; 857 r2 = a->roworiented; 858 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 859 860 baij->roworiented = PETSC_FALSE; 861 a->roworiented = PETSC_FALSE; 862 863 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 864 while (1) { 865 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 866 if (!flg) break; 867 868 for (i=0; i<n;) { 869 /* Now identify the consecutive vals belonging to the same row */ 870 for (j=i,rstart=row[j]; j<n; j++) { 871 if (row[j] != rstart) break; 872 } 873 if (j < n) ncols = j-i; 874 else ncols = n-i; 875 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 876 i = j; 877 } 878 } 879 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 880 881 baij->roworiented = r1; 882 a->roworiented = r2; 883 884 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 885 } 886 887 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 888 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 889 890 /* determine if any processor has disassembled, if so we must 891 also disassemble ourselfs, in order that we may reassemble. */ 892 /* 893 if nonzero structure of submatrix B cannot change then we know that 894 no processor disassembled thus we can skip this stuff 895 */ 896 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 897 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 898 if (mat->was_assembled && !other_disassembled) { 899 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 900 } 901 } 902 903 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 904 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 905 } 906 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 907 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 908 909 #if defined(PETSC_USE_INFO) 910 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 911 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 912 913 baij->ht_total_ct = 0; 914 baij->ht_insert_ct = 0; 915 } 916 #endif 917 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 918 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 919 920 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 921 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 922 } 923 924 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 925 926 baij->rowvalues = 0; 927 928 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 929 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 930 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 931 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 932 } 933 PetscFunctionReturn(0); 934 } 935 936 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer); 937 #include <petscdraw.h> 938 #undef __FUNCT__ 939 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 940 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 941 { 942 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 943 PetscErrorCode ierr; 944 PetscMPIInt rank = baij->rank; 945 PetscInt bs = mat->rmap->bs; 946 PetscBool iascii,isdraw; 947 PetscViewer sviewer; 948 PetscViewerFormat format; 949 950 PetscFunctionBegin; 951 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 952 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 953 if (iascii) { 954 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 955 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 956 MatInfo info; 957 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 958 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 959 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 960 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 961 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 962 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 963 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 964 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 965 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 966 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 967 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 968 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 969 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 970 PetscFunctionReturn(0); 971 } else if (format == PETSC_VIEWER_ASCII_INFO) { 972 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 973 PetscFunctionReturn(0); 974 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 975 PetscFunctionReturn(0); 976 } 977 } 978 979 if (isdraw) { 980 PetscDraw draw; 981 PetscBool isnull; 982 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 983 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 984 } 985 986 { 987 /* assemble the entire matrix onto first processor. */ 988 Mat A; 989 Mat_SeqBAIJ *Aloc; 990 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 991 MatScalar *a; 992 993 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 994 /* Perhaps this should be the type of mat? */ 995 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 996 if (!rank) { 997 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 998 } else { 999 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1000 } 1001 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1002 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 1003 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1004 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1005 1006 /* copy over the A part */ 1007 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1008 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1009 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1010 1011 for (i=0; i<mbs; i++) { 1012 rvals[0] = bs*(baij->rstartbs + i); 1013 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1014 for (j=ai[i]; j<ai[i+1]; j++) { 1015 col = (baij->cstartbs+aj[j])*bs; 1016 for (k=0; k<bs; k++) { 1017 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1018 col++; a += bs; 1019 } 1020 } 1021 } 1022 /* copy over the B part */ 1023 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1024 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1025 for (i=0; i<mbs; i++) { 1026 rvals[0] = bs*(baij->rstartbs + i); 1027 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1028 for (j=ai[i]; j<ai[i+1]; j++) { 1029 col = baij->garray[aj[j]]*bs; 1030 for (k=0; k<bs; k++) { 1031 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1032 col++; a += bs; 1033 } 1034 } 1035 } 1036 ierr = PetscFree(rvals);CHKERRQ(ierr); 1037 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1038 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1039 /* 1040 Everyone has to call to draw the matrix since the graphics waits are 1041 synchronized across all processors that share the PetscDraw object 1042 */ 1043 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1044 if (!rank) { 1045 ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1046 } 1047 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1048 ierr = MatDestroy(&A);CHKERRQ(ierr); 1049 } 1050 PetscFunctionReturn(0); 1051 } 1052 1053 #undef __FUNCT__ 1054 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1055 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1056 { 1057 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1058 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)a->A->data; 1059 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 1060 PetscErrorCode ierr; 1061 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1062 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1063 int fd; 1064 PetscScalar *column_values; 1065 FILE *file; 1066 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1067 PetscInt message_count,flowcontrolcount; 1068 1069 PetscFunctionBegin; 1070 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1071 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1072 nz = bs2*(A->nz + B->nz); 1073 rlen = mat->rmap->n; 1074 if (!rank) { 1075 header[0] = MAT_FILE_CLASSID; 1076 header[1] = mat->rmap->N; 1077 header[2] = mat->cmap->N; 1078 1079 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1080 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1081 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1082 /* get largest number of rows any processor has */ 1083 range = mat->rmap->range; 1084 for (i=1; i<size; i++) { 1085 rlen = PetscMax(rlen,range[i+1] - range[i]); 1086 } 1087 } else { 1088 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1089 } 1090 1091 ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr); 1092 /* compute lengths of each row */ 1093 for (i=0; i<a->mbs; i++) { 1094 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1095 } 1096 /* store the row lengths to the file */ 1097 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1098 if (!rank) { 1099 MPI_Status status; 1100 ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr); 1101 rlen = (range[1] - range[0])/bs; 1102 for (i=0; i<rlen; i++) { 1103 for (j=0; j<bs; j++) { 1104 row_lens[i*bs+j] = bs*crow_lens[i]; 1105 } 1106 } 1107 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1108 for (i=1; i<size; i++) { 1109 rlen = (range[i+1] - range[i])/bs; 1110 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1111 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1112 for (k=0; k<rlen; k++) { 1113 for (j=0; j<bs; j++) { 1114 row_lens[k*bs+j] = bs*crow_lens[k]; 1115 } 1116 } 1117 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1118 } 1119 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1120 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1121 } else { 1122 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1123 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1124 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1125 } 1126 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1127 1128 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1129 information needed to make it for each row from a block row. This does require more communication but still not more than 1130 the communication needed for the nonzero values */ 1131 nzmax = nz; /* space a largest processor needs */ 1132 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1133 ierr = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr); 1134 cnt = 0; 1135 for (i=0; i<a->mbs; i++) { 1136 pcnt = cnt; 1137 for (j=B->i[i]; j<B->i[i+1]; j++) { 1138 if ((col = garray[B->j[j]]) > cstart) break; 1139 for (l=0; l<bs; l++) { 1140 column_indices[cnt++] = bs*col+l; 1141 } 1142 } 1143 for (k=A->i[i]; k<A->i[i+1]; k++) { 1144 for (l=0; l<bs; l++) { 1145 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1146 } 1147 } 1148 for (; j<B->i[i+1]; j++) { 1149 for (l=0; l<bs; l++) { 1150 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1151 } 1152 } 1153 len = cnt - pcnt; 1154 for (k=1; k<bs; k++) { 1155 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1156 cnt += len; 1157 } 1158 } 1159 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1160 1161 /* store the columns to the file */ 1162 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1163 if (!rank) { 1164 MPI_Status status; 1165 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1166 for (i=1; i<size; i++) { 1167 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1168 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1169 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1170 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1171 } 1172 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1173 } else { 1174 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1175 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1176 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1177 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1178 } 1179 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1180 1181 /* load up the numerical values */ 1182 ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr); 1183 cnt = 0; 1184 for (i=0; i<a->mbs; i++) { 1185 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1186 for (j=B->i[i]; j<B->i[i+1]; j++) { 1187 if (garray[B->j[j]] > cstart) break; 1188 for (l=0; l<bs; l++) { 1189 for (ll=0; ll<bs; ll++) { 1190 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1191 } 1192 } 1193 cnt += bs; 1194 } 1195 for (k=A->i[i]; k<A->i[i+1]; k++) { 1196 for (l=0; l<bs; l++) { 1197 for (ll=0; ll<bs; ll++) { 1198 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1199 } 1200 } 1201 cnt += bs; 1202 } 1203 for (; j<B->i[i+1]; j++) { 1204 for (l=0; l<bs; l++) { 1205 for (ll=0; ll<bs; ll++) { 1206 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1207 } 1208 } 1209 cnt += bs; 1210 } 1211 cnt += (bs-1)*rlen; 1212 } 1213 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1214 1215 /* store the column values to the file */ 1216 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1217 if (!rank) { 1218 MPI_Status status; 1219 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1220 for (i=1; i<size; i++) { 1221 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1222 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1223 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1224 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1225 } 1226 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1227 } else { 1228 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1229 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1230 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1231 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1232 } 1233 ierr = PetscFree(column_values);CHKERRQ(ierr); 1234 1235 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1236 if (file) { 1237 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1238 } 1239 PetscFunctionReturn(0); 1240 } 1241 1242 #undef __FUNCT__ 1243 #define __FUNCT__ "MatView_MPIBAIJ" 1244 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1245 { 1246 PetscErrorCode ierr; 1247 PetscBool iascii,isdraw,issocket,isbinary; 1248 1249 PetscFunctionBegin; 1250 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1251 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1252 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1253 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1254 if (iascii || isdraw || issocket) { 1255 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1256 } else if (isbinary) { 1257 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1258 } 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #undef __FUNCT__ 1263 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1264 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1265 { 1266 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1267 PetscErrorCode ierr; 1268 1269 PetscFunctionBegin; 1270 #if defined(PETSC_USE_LOG) 1271 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1272 #endif 1273 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1274 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1275 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1276 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1277 #if defined(PETSC_USE_CTABLE) 1278 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1279 #else 1280 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1281 #endif 1282 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1283 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1284 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1285 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1286 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1287 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1288 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1289 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1290 1291 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1292 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1293 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1294 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr); 1301 PetscFunctionReturn(0); 1302 } 1303 1304 #undef __FUNCT__ 1305 #define __FUNCT__ "MatMult_MPIBAIJ" 1306 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1307 { 1308 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1309 PetscErrorCode ierr; 1310 PetscInt nt; 1311 1312 PetscFunctionBegin; 1313 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1314 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1315 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1316 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1317 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1318 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1319 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1320 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1321 PetscFunctionReturn(0); 1322 } 1323 1324 #undef __FUNCT__ 1325 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1326 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1327 { 1328 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1329 PetscErrorCode ierr; 1330 1331 PetscFunctionBegin; 1332 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1333 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1334 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1335 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1336 PetscFunctionReturn(0); 1337 } 1338 1339 #undef __FUNCT__ 1340 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1341 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1342 { 1343 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1344 PetscErrorCode ierr; 1345 PetscBool merged; 1346 1347 PetscFunctionBegin; 1348 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1349 /* do nondiagonal part */ 1350 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1351 if (!merged) { 1352 /* send it on its way */ 1353 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1354 /* do local part */ 1355 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1356 /* receive remote parts: note this assumes the values are not actually */ 1357 /* inserted in yy until the next line */ 1358 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1359 } else { 1360 /* do local part */ 1361 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1362 /* send it on its way */ 1363 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1364 /* values actually were received in the Begin() but we need to call this nop */ 1365 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1366 } 1367 PetscFunctionReturn(0); 1368 } 1369 1370 #undef __FUNCT__ 1371 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1372 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1373 { 1374 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1375 PetscErrorCode ierr; 1376 1377 PetscFunctionBegin; 1378 /* do nondiagonal part */ 1379 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1380 /* send it on its way */ 1381 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1382 /* do local part */ 1383 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1384 /* receive remote parts: note this assumes the values are not actually */ 1385 /* inserted in yy until the next line, which is true for my implementation*/ 1386 /* but is not perhaps always true. */ 1387 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1388 PetscFunctionReturn(0); 1389 } 1390 1391 /* 1392 This only works correctly for square matrices where the subblock A->A is the 1393 diagonal block 1394 */ 1395 #undef __FUNCT__ 1396 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1397 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1398 { 1399 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1400 PetscErrorCode ierr; 1401 1402 PetscFunctionBegin; 1403 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1404 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1405 PetscFunctionReturn(0); 1406 } 1407 1408 #undef __FUNCT__ 1409 #define __FUNCT__ "MatScale_MPIBAIJ" 1410 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1411 { 1412 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1413 PetscErrorCode ierr; 1414 1415 PetscFunctionBegin; 1416 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1417 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1418 PetscFunctionReturn(0); 1419 } 1420 1421 #undef __FUNCT__ 1422 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1423 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1424 { 1425 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1426 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1427 PetscErrorCode ierr; 1428 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1429 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1430 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1431 1432 PetscFunctionBegin; 1433 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1434 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1435 mat->getrowactive = PETSC_TRUE; 1436 1437 if (!mat->rowvalues && (idx || v)) { 1438 /* 1439 allocate enough space to hold information from the longest row. 1440 */ 1441 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1442 PetscInt max = 1,mbs = mat->mbs,tmp; 1443 for (i=0; i<mbs; i++) { 1444 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1445 if (max < tmp) max = tmp; 1446 } 1447 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1448 } 1449 lrow = row - brstart; 1450 1451 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1452 if (!v) {pvA = 0; pvB = 0;} 1453 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1454 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1455 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1456 nztot = nzA + nzB; 1457 1458 cmap = mat->garray; 1459 if (v || idx) { 1460 if (nztot) { 1461 /* Sort by increasing column numbers, assuming A and B already sorted */ 1462 PetscInt imark = -1; 1463 if (v) { 1464 *v = v_p = mat->rowvalues; 1465 for (i=0; i<nzB; i++) { 1466 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1467 else break; 1468 } 1469 imark = i; 1470 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1471 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1472 } 1473 if (idx) { 1474 *idx = idx_p = mat->rowindices; 1475 if (imark > -1) { 1476 for (i=0; i<imark; i++) { 1477 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1478 } 1479 } else { 1480 for (i=0; i<nzB; i++) { 1481 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1482 else break; 1483 } 1484 imark = i; 1485 } 1486 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1487 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1488 } 1489 } else { 1490 if (idx) *idx = 0; 1491 if (v) *v = 0; 1492 } 1493 } 1494 *nz = nztot; 1495 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1496 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1497 PetscFunctionReturn(0); 1498 } 1499 1500 #undef __FUNCT__ 1501 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1502 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1503 { 1504 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1505 1506 PetscFunctionBegin; 1507 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1508 baij->getrowactive = PETSC_FALSE; 1509 PetscFunctionReturn(0); 1510 } 1511 1512 #undef __FUNCT__ 1513 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1514 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1515 { 1516 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1517 PetscErrorCode ierr; 1518 1519 PetscFunctionBegin; 1520 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1521 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1522 PetscFunctionReturn(0); 1523 } 1524 1525 #undef __FUNCT__ 1526 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1527 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1528 { 1529 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1530 Mat A = a->A,B = a->B; 1531 PetscErrorCode ierr; 1532 PetscReal isend[5],irecv[5]; 1533 1534 PetscFunctionBegin; 1535 info->block_size = (PetscReal)matin->rmap->bs; 1536 1537 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1538 1539 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1540 isend[3] = info->memory; isend[4] = info->mallocs; 1541 1542 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1543 1544 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1545 isend[3] += info->memory; isend[4] += info->mallocs; 1546 1547 if (flag == MAT_LOCAL) { 1548 info->nz_used = isend[0]; 1549 info->nz_allocated = isend[1]; 1550 info->nz_unneeded = isend[2]; 1551 info->memory = isend[3]; 1552 info->mallocs = isend[4]; 1553 } else if (flag == MAT_GLOBAL_MAX) { 1554 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1555 1556 info->nz_used = irecv[0]; 1557 info->nz_allocated = irecv[1]; 1558 info->nz_unneeded = irecv[2]; 1559 info->memory = irecv[3]; 1560 info->mallocs = irecv[4]; 1561 } else if (flag == MAT_GLOBAL_SUM) { 1562 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1563 1564 info->nz_used = irecv[0]; 1565 info->nz_allocated = irecv[1]; 1566 info->nz_unneeded = irecv[2]; 1567 info->memory = irecv[3]; 1568 info->mallocs = irecv[4]; 1569 } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1570 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1571 info->fill_ratio_needed = 0; 1572 info->factor_mallocs = 0; 1573 PetscFunctionReturn(0); 1574 } 1575 1576 #undef __FUNCT__ 1577 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1578 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1579 { 1580 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1581 PetscErrorCode ierr; 1582 1583 PetscFunctionBegin; 1584 switch (op) { 1585 case MAT_NEW_NONZERO_LOCATIONS: 1586 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1587 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1588 case MAT_KEEP_NONZERO_PATTERN: 1589 case MAT_NEW_NONZERO_LOCATION_ERR: 1590 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1591 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1592 break; 1593 case MAT_ROW_ORIENTED: 1594 a->roworiented = flg; 1595 1596 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1597 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1598 break; 1599 case MAT_NEW_DIAGONALS: 1600 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1601 break; 1602 case MAT_IGNORE_OFF_PROC_ENTRIES: 1603 a->donotstash = flg; 1604 break; 1605 case MAT_USE_HASH_TABLE: 1606 a->ht_flag = flg; 1607 break; 1608 case MAT_SYMMETRIC: 1609 case MAT_STRUCTURALLY_SYMMETRIC: 1610 case MAT_HERMITIAN: 1611 case MAT_SYMMETRY_ETERNAL: 1612 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1613 break; 1614 default: 1615 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1616 } 1617 PetscFunctionReturn(0); 1618 } 1619 1620 #undef __FUNCT__ 1621 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1622 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1623 { 1624 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1625 Mat_SeqBAIJ *Aloc; 1626 Mat B; 1627 PetscErrorCode ierr; 1628 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1629 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1630 MatScalar *a; 1631 1632 PetscFunctionBegin; 1633 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1634 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1635 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1636 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1637 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1638 /* Do not know preallocation information, but must set block size */ 1639 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr); 1640 } else { 1641 B = *matout; 1642 } 1643 1644 /* copy over the A part */ 1645 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1646 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1647 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1648 1649 for (i=0; i<mbs; i++) { 1650 rvals[0] = bs*(baij->rstartbs + i); 1651 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1652 for (j=ai[i]; j<ai[i+1]; j++) { 1653 col = (baij->cstartbs+aj[j])*bs; 1654 for (k=0; k<bs; k++) { 1655 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1656 1657 col++; a += bs; 1658 } 1659 } 1660 } 1661 /* copy over the B part */ 1662 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1663 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1664 for (i=0; i<mbs; i++) { 1665 rvals[0] = bs*(baij->rstartbs + i); 1666 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1667 for (j=ai[i]; j<ai[i+1]; j++) { 1668 col = baij->garray[aj[j]]*bs; 1669 for (k=0; k<bs; k++) { 1670 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1671 col++; 1672 a += bs; 1673 } 1674 } 1675 } 1676 ierr = PetscFree(rvals);CHKERRQ(ierr); 1677 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1678 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1679 1680 if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B; 1681 else { 1682 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1683 } 1684 PetscFunctionReturn(0); 1685 } 1686 1687 #undef __FUNCT__ 1688 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1689 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1690 { 1691 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1692 Mat a = baij->A,b = baij->B; 1693 PetscErrorCode ierr; 1694 PetscInt s1,s2,s3; 1695 1696 PetscFunctionBegin; 1697 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1698 if (rr) { 1699 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1700 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1701 /* Overlap communication with computation. */ 1702 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1703 } 1704 if (ll) { 1705 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1706 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1707 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1708 } 1709 /* scale the diagonal block */ 1710 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1711 1712 if (rr) { 1713 /* Do a scatter end and then right scale the off-diagonal block */ 1714 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1715 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1716 } 1717 PetscFunctionReturn(0); 1718 } 1719 1720 #undef __FUNCT__ 1721 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1722 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1723 { 1724 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1725 PetscInt *owners = A->rmap->range; 1726 PetscInt n = A->rmap->n; 1727 PetscSF sf; 1728 PetscInt *lrows; 1729 PetscSFNode *rrows; 1730 PetscInt r, p = 0, len = 0; 1731 PetscErrorCode ierr; 1732 1733 PetscFunctionBegin; 1734 /* Create SF where leaves are input rows and roots are owned rows */ 1735 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1736 for (r = 0; r < n; ++r) lrows[r] = -1; 1737 if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);} 1738 for (r = 0; r < N; ++r) { 1739 const PetscInt idx = rows[r]; 1740 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1741 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1742 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1743 } 1744 if (A->nooffproczerorows) { 1745 if (p != l->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,l->rank); 1746 lrows[len++] = idx - owners[p]; 1747 } else { 1748 rrows[r].rank = p; 1749 rrows[r].index = rows[r] - owners[p]; 1750 } 1751 } 1752 if (!A->nooffproczerorows) { 1753 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1754 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1755 /* Collect flags for rows to be zeroed */ 1756 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1757 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1758 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1759 /* Compress and put in row numbers */ 1760 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1761 } 1762 /* fix right hand side if needed */ 1763 if (x && b) { 1764 const PetscScalar *xx; 1765 PetscScalar *bb; 1766 1767 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1768 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1769 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 1770 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1771 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1772 } 1773 1774 /* actually zap the local rows */ 1775 /* 1776 Zero the required rows. If the "diagonal block" of the matrix 1777 is square and the user wishes to set the diagonal we use separate 1778 code so that MatSetValues() is not called for each diagonal allocating 1779 new memory, thus calling lots of mallocs and slowing things down. 1780 1781 */ 1782 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1783 ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1784 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1785 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr); 1786 } else if (diag != 0.0) { 1787 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr); 1788 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1789 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1790 for (r = 0; r < len; ++r) { 1791 const PetscInt row = lrows[r] + A->rmap->rstart; 1792 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1793 } 1794 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1795 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1796 } else { 1797 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1798 } 1799 ierr = PetscFree(lrows);CHKERRQ(ierr); 1800 1801 /* only change matrix nonzero state if pattern was allowed to be changed */ 1802 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1803 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1804 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1805 } 1806 PetscFunctionReturn(0); 1807 } 1808 1809 #undef __FUNCT__ 1810 #define __FUNCT__ "MatZeroRowsColumns_MPIBAIJ" 1811 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1812 { 1813 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1814 PetscErrorCode ierr; 1815 PetscMPIInt n = A->rmap->n; 1816 PetscInt i,j,k,r,p = 0,len = 0,row,col,count; 1817 PetscInt *lrows,*owners = A->rmap->range; 1818 PetscSFNode *rrows; 1819 PetscSF sf; 1820 const PetscScalar *xx; 1821 PetscScalar *bb,*mask; 1822 Vec xmask,lmask; 1823 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)l->B->data; 1824 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1825 PetscScalar *aa; 1826 1827 PetscFunctionBegin; 1828 /* Create SF where leaves are input rows and roots are owned rows */ 1829 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1830 for (r = 0; r < n; ++r) lrows[r] = -1; 1831 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1832 for (r = 0; r < N; ++r) { 1833 const PetscInt idx = rows[r]; 1834 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1835 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1836 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1837 } 1838 rrows[r].rank = p; 1839 rrows[r].index = rows[r] - owners[p]; 1840 } 1841 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1842 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1843 /* Collect flags for rows to be zeroed */ 1844 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1845 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1846 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1847 /* Compress and put in row numbers */ 1848 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1849 /* zero diagonal part of matrix */ 1850 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1851 /* handle off diagonal part of matrix */ 1852 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1853 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1854 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1855 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1856 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1857 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1858 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1859 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1860 if (x) { 1861 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1862 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1863 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1864 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1865 } 1866 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1867 /* remove zeroed rows of off diagonal matrix */ 1868 for (i = 0; i < len; ++i) { 1869 row = lrows[i]; 1870 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1871 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 1872 for (k = 0; k < count; ++k) { 1873 aa[0] = 0.0; 1874 aa += bs; 1875 } 1876 } 1877 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1878 for (i = 0; i < l->B->rmap->N; ++i) { 1879 row = i/bs; 1880 for (j = baij->i[row]; j < baij->i[row+1]; ++j) { 1881 for (k = 0; k < bs; ++k) { 1882 col = bs*baij->j[j] + k; 1883 if (PetscAbsScalar(mask[col])) { 1884 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 1885 if (b) bb[i] -= aa[0]*xx[col]; 1886 aa[0] = 0.0; 1887 } 1888 } 1889 } 1890 } 1891 if (x) { 1892 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1893 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1894 } 1895 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1896 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1897 ierr = PetscFree(lrows);CHKERRQ(ierr); 1898 1899 /* only change matrix nonzero state if pattern was allowed to be changed */ 1900 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1901 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1902 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1903 } 1904 PetscFunctionReturn(0); 1905 } 1906 1907 #undef __FUNCT__ 1908 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1909 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1910 { 1911 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1912 PetscErrorCode ierr; 1913 1914 PetscFunctionBegin; 1915 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1916 PetscFunctionReturn(0); 1917 } 1918 1919 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 1920 1921 #undef __FUNCT__ 1922 #define __FUNCT__ "MatEqual_MPIBAIJ" 1923 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1924 { 1925 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1926 Mat a,b,c,d; 1927 PetscBool flg; 1928 PetscErrorCode ierr; 1929 1930 PetscFunctionBegin; 1931 a = matA->A; b = matA->B; 1932 c = matB->A; d = matB->B; 1933 1934 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1935 if (flg) { 1936 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1937 } 1938 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1939 PetscFunctionReturn(0); 1940 } 1941 1942 #undef __FUNCT__ 1943 #define __FUNCT__ "MatCopy_MPIBAIJ" 1944 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1945 { 1946 PetscErrorCode ierr; 1947 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1948 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 1949 1950 PetscFunctionBegin; 1951 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1952 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1953 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1954 } else { 1955 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1956 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1957 } 1958 PetscFunctionReturn(0); 1959 } 1960 1961 #undef __FUNCT__ 1962 #define __FUNCT__ "MatSetUp_MPIBAIJ" 1963 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1964 { 1965 PetscErrorCode ierr; 1966 1967 PetscFunctionBegin; 1968 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1969 PetscFunctionReturn(0); 1970 } 1971 1972 #undef __FUNCT__ 1973 #define __FUNCT__ "MatAXPYGetPreallocation_MPIBAIJ" 1974 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 1975 { 1976 PetscErrorCode ierr; 1977 PetscInt bs = Y->rmap->bs,m = Y->rmap->N/bs; 1978 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 1979 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 1980 1981 PetscFunctionBegin; 1982 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 1983 PetscFunctionReturn(0); 1984 } 1985 1986 #undef __FUNCT__ 1987 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1988 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1989 { 1990 PetscErrorCode ierr; 1991 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 1992 PetscBLASInt bnz,one=1; 1993 Mat_SeqBAIJ *x,*y; 1994 1995 PetscFunctionBegin; 1996 if (str == SAME_NONZERO_PATTERN) { 1997 PetscScalar alpha = a; 1998 x = (Mat_SeqBAIJ*)xx->A->data; 1999 y = (Mat_SeqBAIJ*)yy->A->data; 2000 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2001 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2002 x = (Mat_SeqBAIJ*)xx->B->data; 2003 y = (Mat_SeqBAIJ*)yy->B->data; 2004 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2005 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2006 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2007 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2008 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2009 } else { 2010 Mat B; 2011 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 2012 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2013 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2014 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2015 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2016 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2017 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2018 ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr); 2019 ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2020 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2021 ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2022 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 2023 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2024 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2025 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2026 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2027 } 2028 PetscFunctionReturn(0); 2029 } 2030 2031 #undef __FUNCT__ 2032 #define __FUNCT__ "MatRealPart_MPIBAIJ" 2033 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 2034 { 2035 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2036 PetscErrorCode ierr; 2037 2038 PetscFunctionBegin; 2039 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2040 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2041 PetscFunctionReturn(0); 2042 } 2043 2044 #undef __FUNCT__ 2045 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 2046 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2047 { 2048 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2049 PetscErrorCode ierr; 2050 2051 PetscFunctionBegin; 2052 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2053 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2054 PetscFunctionReturn(0); 2055 } 2056 2057 #undef __FUNCT__ 2058 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2059 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2060 { 2061 PetscErrorCode ierr; 2062 IS iscol_local; 2063 PetscInt csize; 2064 2065 PetscFunctionBegin; 2066 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2067 if (call == MAT_REUSE_MATRIX) { 2068 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2069 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2070 } else { 2071 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2072 } 2073 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2074 if (call == MAT_INITIAL_MATRIX) { 2075 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2076 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2077 } 2078 PetscFunctionReturn(0); 2079 } 2080 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*); 2081 #undef __FUNCT__ 2082 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2083 /* 2084 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2085 in local and then by concatenating the local matrices the end result. 2086 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2087 */ 2088 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2089 { 2090 PetscErrorCode ierr; 2091 PetscMPIInt rank,size; 2092 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2093 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow; 2094 Mat M,Mreuse; 2095 MatScalar *vwork,*aa; 2096 MPI_Comm comm; 2097 IS isrow_new, iscol_new; 2098 PetscBool idflag,allrows, allcols; 2099 Mat_SeqBAIJ *aij; 2100 2101 PetscFunctionBegin; 2102 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2103 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2104 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2105 /* The compression and expansion should be avoided. Doesn't point 2106 out errors, might change the indices, hence buggey */ 2107 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 2108 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 2109 2110 /* Check for special case: each processor gets entire matrix columns */ 2111 ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr); 2112 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 2113 if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE; 2114 else allcols = PETSC_FALSE; 2115 2116 ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr); 2117 ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr); 2118 if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE; 2119 else allrows = PETSC_FALSE; 2120 2121 if (call == MAT_REUSE_MATRIX) { 2122 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 2123 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2124 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2125 } else { 2126 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2127 } 2128 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 2129 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 2130 /* 2131 m - number of local rows 2132 n - number of columns (same on all processors) 2133 rstart - first row in new global matrix generated 2134 */ 2135 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2136 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2137 m = m/bs; 2138 n = n/bs; 2139 2140 if (call == MAT_INITIAL_MATRIX) { 2141 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2142 ii = aij->i; 2143 jj = aij->j; 2144 2145 /* 2146 Determine the number of non-zeros in the diagonal and off-diagonal 2147 portions of the matrix in order to do correct preallocation 2148 */ 2149 2150 /* first get start and end of "diagonal" columns */ 2151 if (csize == PETSC_DECIDE) { 2152 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2153 if (mglobal == n*bs) { /* square matrix */ 2154 nlocal = m; 2155 } else { 2156 nlocal = n/size + ((n % size) > rank); 2157 } 2158 } else { 2159 nlocal = csize/bs; 2160 } 2161 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2162 rstart = rend - nlocal; 2163 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2164 2165 /* next, compute all the lengths */ 2166 ierr = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr); 2167 for (i=0; i<m; i++) { 2168 jend = ii[i+1] - ii[i]; 2169 olen = 0; 2170 dlen = 0; 2171 for (j=0; j<jend; j++) { 2172 if (*jj < rstart || *jj >= rend) olen++; 2173 else dlen++; 2174 jj++; 2175 } 2176 olens[i] = olen; 2177 dlens[i] = dlen; 2178 } 2179 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2180 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2181 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2182 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2183 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 2184 } else { 2185 PetscInt ml,nl; 2186 2187 M = *newmat; 2188 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2189 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2190 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2191 /* 2192 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2193 rather than the slower MatSetValues(). 2194 */ 2195 M->was_assembled = PETSC_TRUE; 2196 M->assembled = PETSC_FALSE; 2197 } 2198 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2199 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2200 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2201 ii = aij->i; 2202 jj = aij->j; 2203 aa = aij->a; 2204 for (i=0; i<m; i++) { 2205 row = rstart/bs + i; 2206 nz = ii[i+1] - ii[i]; 2207 cwork = jj; jj += nz; 2208 vwork = aa; aa += nz*bs*bs; 2209 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2210 } 2211 2212 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2213 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2214 *newmat = M; 2215 2216 /* save submatrix used in processor for next request */ 2217 if (call == MAT_INITIAL_MATRIX) { 2218 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2219 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2220 } 2221 PetscFunctionReturn(0); 2222 } 2223 2224 #undef __FUNCT__ 2225 #define __FUNCT__ "MatPermute_MPIBAIJ" 2226 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2227 { 2228 MPI_Comm comm,pcomm; 2229 PetscInt clocal_size,nrows; 2230 const PetscInt *rows; 2231 PetscMPIInt size; 2232 IS crowp,lcolp; 2233 PetscErrorCode ierr; 2234 2235 PetscFunctionBegin; 2236 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2237 /* make a collective version of 'rowp' */ 2238 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2239 if (pcomm==comm) { 2240 crowp = rowp; 2241 } else { 2242 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2243 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2244 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2245 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2246 } 2247 ierr = ISSetPermutation(crowp);CHKERRQ(ierr); 2248 /* make a local version of 'colp' */ 2249 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2250 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2251 if (size==1) { 2252 lcolp = colp; 2253 } else { 2254 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 2255 } 2256 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2257 /* now we just get the submatrix */ 2258 ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr); 2259 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2260 /* clean up */ 2261 if (pcomm!=comm) { 2262 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2263 } 2264 if (size>1) { 2265 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2266 } 2267 PetscFunctionReturn(0); 2268 } 2269 2270 #undef __FUNCT__ 2271 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2272 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2273 { 2274 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2275 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2276 2277 PetscFunctionBegin; 2278 if (nghosts) *nghosts = B->nbs; 2279 if (ghosts) *ghosts = baij->garray; 2280 PetscFunctionReturn(0); 2281 } 2282 2283 #undef __FUNCT__ 2284 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ" 2285 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2286 { 2287 Mat B; 2288 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2289 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2290 Mat_SeqAIJ *b; 2291 PetscErrorCode ierr; 2292 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2293 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2294 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2295 2296 PetscFunctionBegin; 2297 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2298 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2299 2300 /* ---------------------------------------------------------------- 2301 Tell every processor the number of nonzeros per row 2302 */ 2303 ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr); 2304 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2305 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2306 } 2307 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2308 ierr = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr); 2309 displs = recvcounts + size; 2310 for (i=0; i<size; i++) { 2311 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2312 displs[i] = A->rmap->range[i]/bs; 2313 } 2314 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2315 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2316 #else 2317 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2318 #endif 2319 /* --------------------------------------------------------------- 2320 Create the sequential matrix of the same type as the local block diagonal 2321 */ 2322 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2323 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2324 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2325 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2326 b = (Mat_SeqAIJ*)B->data; 2327 2328 /*-------------------------------------------------------------------- 2329 Copy my part of matrix column indices over 2330 */ 2331 sendcount = ad->nz + bd->nz; 2332 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2333 a_jsendbuf = ad->j; 2334 b_jsendbuf = bd->j; 2335 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2336 cnt = 0; 2337 for (i=0; i<n; i++) { 2338 2339 /* put in lower diagonal portion */ 2340 m = bd->i[i+1] - bd->i[i]; 2341 while (m > 0) { 2342 /* is it above diagonal (in bd (compressed) numbering) */ 2343 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2344 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2345 m--; 2346 } 2347 2348 /* put in diagonal portion */ 2349 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2350 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2351 } 2352 2353 /* put in upper diagonal portion */ 2354 while (m-- > 0) { 2355 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2356 } 2357 } 2358 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2359 2360 /*-------------------------------------------------------------------- 2361 Gather all column indices to all processors 2362 */ 2363 for (i=0; i<size; i++) { 2364 recvcounts[i] = 0; 2365 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2366 recvcounts[i] += lens[j]; 2367 } 2368 } 2369 displs[0] = 0; 2370 for (i=1; i<size; i++) { 2371 displs[i] = displs[i-1] + recvcounts[i-1]; 2372 } 2373 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2374 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2375 #else 2376 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2377 #endif 2378 /*-------------------------------------------------------------------- 2379 Assemble the matrix into useable form (note numerical values not yet set) 2380 */ 2381 /* set the b->ilen (length of each row) values */ 2382 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2383 /* set the b->i indices */ 2384 b->i[0] = 0; 2385 for (i=1; i<=A->rmap->N/bs; i++) { 2386 b->i[i] = b->i[i-1] + lens[i-1]; 2387 } 2388 ierr = PetscFree(lens);CHKERRQ(ierr); 2389 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2390 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2391 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2392 2393 if (A->symmetric) { 2394 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2395 } else if (A->hermitian) { 2396 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2397 } else if (A->structurally_symmetric) { 2398 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2399 } 2400 *newmat = B; 2401 PetscFunctionReturn(0); 2402 } 2403 2404 #undef __FUNCT__ 2405 #define __FUNCT__ "MatSOR_MPIBAIJ" 2406 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2407 { 2408 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2409 PetscErrorCode ierr; 2410 Vec bb1 = 0; 2411 2412 PetscFunctionBegin; 2413 if (flag == SOR_APPLY_UPPER) { 2414 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2415 PetscFunctionReturn(0); 2416 } 2417 2418 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2419 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2420 } 2421 2422 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2423 if (flag & SOR_ZERO_INITIAL_GUESS) { 2424 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2425 its--; 2426 } 2427 2428 while (its--) { 2429 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2430 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2431 2432 /* update rhs: bb1 = bb - B*x */ 2433 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2434 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2435 2436 /* local sweep */ 2437 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2438 } 2439 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2440 if (flag & SOR_ZERO_INITIAL_GUESS) { 2441 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2442 its--; 2443 } 2444 while (its--) { 2445 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2446 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2447 2448 /* update rhs: bb1 = bb - B*x */ 2449 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2450 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2451 2452 /* local sweep */ 2453 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2454 } 2455 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2456 if (flag & SOR_ZERO_INITIAL_GUESS) { 2457 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2458 its--; 2459 } 2460 while (its--) { 2461 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2462 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2463 2464 /* update rhs: bb1 = bb - B*x */ 2465 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2466 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2467 2468 /* local sweep */ 2469 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2470 } 2471 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2472 2473 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2474 PetscFunctionReturn(0); 2475 } 2476 2477 #undef __FUNCT__ 2478 #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ" 2479 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms) 2480 { 2481 PetscErrorCode ierr; 2482 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data; 2483 PetscInt N,i,*garray = aij->garray; 2484 PetscInt ib,jb,bs = A->rmap->bs; 2485 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data; 2486 MatScalar *a_val = a_aij->a; 2487 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data; 2488 MatScalar *b_val = b_aij->a; 2489 PetscReal *work; 2490 2491 PetscFunctionBegin; 2492 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 2493 ierr = PetscCalloc1(N,&work);CHKERRQ(ierr); 2494 if (type == NORM_2) { 2495 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2496 for (jb=0; jb<bs; jb++) { 2497 for (ib=0; ib<bs; ib++) { 2498 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2499 a_val++; 2500 } 2501 } 2502 } 2503 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2504 for (jb=0; jb<bs; jb++) { 2505 for (ib=0; ib<bs; ib++) { 2506 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2507 b_val++; 2508 } 2509 } 2510 } 2511 } else if (type == NORM_1) { 2512 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2513 for (jb=0; jb<bs; jb++) { 2514 for (ib=0; ib<bs; ib++) { 2515 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2516 a_val++; 2517 } 2518 } 2519 } 2520 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2521 for (jb=0; jb<bs; jb++) { 2522 for (ib=0; ib<bs; ib++) { 2523 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2524 b_val++; 2525 } 2526 } 2527 } 2528 } else if (type == NORM_INFINITY) { 2529 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2530 for (jb=0; jb<bs; jb++) { 2531 for (ib=0; ib<bs; ib++) { 2532 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2533 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2534 a_val++; 2535 } 2536 } 2537 } 2538 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2539 for (jb=0; jb<bs; jb++) { 2540 for (ib=0; ib<bs; ib++) { 2541 int col = garray[b_aij->j[i]] * bs + jb; 2542 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2543 b_val++; 2544 } 2545 } 2546 } 2547 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 2548 if (type == NORM_INFINITY) { 2549 ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2550 } else { 2551 ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2552 } 2553 ierr = PetscFree(work);CHKERRQ(ierr); 2554 if (type == NORM_2) { 2555 for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]); 2556 } 2557 PetscFunctionReturn(0); 2558 } 2559 2560 #undef __FUNCT__ 2561 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ" 2562 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2563 { 2564 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2565 PetscErrorCode ierr; 2566 2567 PetscFunctionBegin; 2568 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2569 PetscFunctionReturn(0); 2570 } 2571 2572 2573 /* -------------------------------------------------------------------*/ 2574 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2575 MatGetRow_MPIBAIJ, 2576 MatRestoreRow_MPIBAIJ, 2577 MatMult_MPIBAIJ, 2578 /* 4*/ MatMultAdd_MPIBAIJ, 2579 MatMultTranspose_MPIBAIJ, 2580 MatMultTransposeAdd_MPIBAIJ, 2581 0, 2582 0, 2583 0, 2584 /*10*/ 0, 2585 0, 2586 0, 2587 MatSOR_MPIBAIJ, 2588 MatTranspose_MPIBAIJ, 2589 /*15*/ MatGetInfo_MPIBAIJ, 2590 MatEqual_MPIBAIJ, 2591 MatGetDiagonal_MPIBAIJ, 2592 MatDiagonalScale_MPIBAIJ, 2593 MatNorm_MPIBAIJ, 2594 /*20*/ MatAssemblyBegin_MPIBAIJ, 2595 MatAssemblyEnd_MPIBAIJ, 2596 MatSetOption_MPIBAIJ, 2597 MatZeroEntries_MPIBAIJ, 2598 /*24*/ MatZeroRows_MPIBAIJ, 2599 0, 2600 0, 2601 0, 2602 0, 2603 /*29*/ MatSetUp_MPIBAIJ, 2604 0, 2605 0, 2606 0, 2607 0, 2608 /*34*/ MatDuplicate_MPIBAIJ, 2609 0, 2610 0, 2611 0, 2612 0, 2613 /*39*/ MatAXPY_MPIBAIJ, 2614 MatGetSubMatrices_MPIBAIJ, 2615 MatIncreaseOverlap_MPIBAIJ, 2616 MatGetValues_MPIBAIJ, 2617 MatCopy_MPIBAIJ, 2618 /*44*/ 0, 2619 MatScale_MPIBAIJ, 2620 0, 2621 0, 2622 MatZeroRowsColumns_MPIBAIJ, 2623 /*49*/ 0, 2624 0, 2625 0, 2626 0, 2627 0, 2628 /*54*/ MatFDColoringCreate_MPIXAIJ, 2629 0, 2630 MatSetUnfactored_MPIBAIJ, 2631 MatPermute_MPIBAIJ, 2632 MatSetValuesBlocked_MPIBAIJ, 2633 /*59*/ MatGetSubMatrix_MPIBAIJ, 2634 MatDestroy_MPIBAIJ, 2635 MatView_MPIBAIJ, 2636 0, 2637 0, 2638 /*64*/ 0, 2639 0, 2640 0, 2641 0, 2642 0, 2643 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2644 0, 2645 0, 2646 0, 2647 0, 2648 /*74*/ 0, 2649 MatFDColoringApply_BAIJ, 2650 0, 2651 0, 2652 0, 2653 /*79*/ 0, 2654 0, 2655 0, 2656 0, 2657 MatLoad_MPIBAIJ, 2658 /*84*/ 0, 2659 0, 2660 0, 2661 0, 2662 0, 2663 /*89*/ 0, 2664 0, 2665 0, 2666 0, 2667 0, 2668 /*94*/ 0, 2669 0, 2670 0, 2671 0, 2672 0, 2673 /*99*/ 0, 2674 0, 2675 0, 2676 0, 2677 0, 2678 /*104*/0, 2679 MatRealPart_MPIBAIJ, 2680 MatImaginaryPart_MPIBAIJ, 2681 0, 2682 0, 2683 /*109*/0, 2684 0, 2685 0, 2686 0, 2687 0, 2688 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2689 0, 2690 MatGetGhosts_MPIBAIJ, 2691 0, 2692 0, 2693 /*119*/0, 2694 0, 2695 0, 2696 0, 2697 MatGetMultiProcBlock_MPIBAIJ, 2698 /*124*/0, 2699 MatGetColumnNorms_MPIBAIJ, 2700 MatInvertBlockDiagonal_MPIBAIJ, 2701 0, 2702 0, 2703 /*129*/ 0, 2704 0, 2705 0, 2706 0, 2707 0, 2708 /*134*/ 0, 2709 0, 2710 0, 2711 0, 2712 0, 2713 /*139*/ 0, 2714 0, 2715 0, 2716 MatFDColoringSetUp_MPIXAIJ, 2717 0, 2718 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ 2719 }; 2720 2721 #undef __FUNCT__ 2722 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2723 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2724 { 2725 PetscFunctionBegin; 2726 *a = ((Mat_MPIBAIJ*)A->data)->A; 2727 PetscFunctionReturn(0); 2728 } 2729 2730 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2731 2732 #undef __FUNCT__ 2733 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2734 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2735 { 2736 PetscInt m,rstart,cstart,cend; 2737 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2738 const PetscInt *JJ =0; 2739 PetscScalar *values=0; 2740 PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented; 2741 PetscErrorCode ierr; 2742 2743 PetscFunctionBegin; 2744 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2745 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2746 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2747 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2748 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2749 m = B->rmap->n/bs; 2750 rstart = B->rmap->rstart/bs; 2751 cstart = B->cmap->rstart/bs; 2752 cend = B->cmap->rend/bs; 2753 2754 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2755 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2756 for (i=0; i<m; i++) { 2757 nz = ii[i+1] - ii[i]; 2758 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2759 nz_max = PetscMax(nz_max,nz); 2760 JJ = jj + ii[i]; 2761 for (j=0; j<nz; j++) { 2762 if (*JJ >= cstart) break; 2763 JJ++; 2764 } 2765 d = 0; 2766 for (; j<nz; j++) { 2767 if (*JJ++ >= cend) break; 2768 d++; 2769 } 2770 d_nnz[i] = d; 2771 o_nnz[i] = nz - d; 2772 } 2773 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2774 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2775 2776 values = (PetscScalar*)V; 2777 if (!values) { 2778 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2779 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2780 } 2781 for (i=0; i<m; i++) { 2782 PetscInt row = i + rstart; 2783 PetscInt ncols = ii[i+1] - ii[i]; 2784 const PetscInt *icols = jj + ii[i]; 2785 if (!roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2786 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2787 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2788 } else { /* block ordering does not match so we can only insert one block at a time. */ 2789 PetscInt j; 2790 for (j=0; j<ncols; j++) { 2791 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2792 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2793 } 2794 } 2795 } 2796 2797 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2798 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2799 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2800 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2801 PetscFunctionReturn(0); 2802 } 2803 2804 #undef __FUNCT__ 2805 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2806 /*@C 2807 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2808 (the default parallel PETSc format). 2809 2810 Collective on MPI_Comm 2811 2812 Input Parameters: 2813 + B - the matrix 2814 . bs - the block size 2815 . i - the indices into j for the start of each local row (starts with zero) 2816 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2817 - v - optional values in the matrix 2818 2819 Level: developer 2820 2821 Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 2822 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 2823 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2824 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2825 block column and the second index is over columns within a block. 2826 2827 .keywords: matrix, aij, compressed row, sparse, parallel 2828 2829 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ 2830 @*/ 2831 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2832 { 2833 PetscErrorCode ierr; 2834 2835 PetscFunctionBegin; 2836 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2837 PetscValidType(B,1); 2838 PetscValidLogicalCollectiveInt(B,bs,2); 2839 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2840 PetscFunctionReturn(0); 2841 } 2842 2843 #undef __FUNCT__ 2844 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2845 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2846 { 2847 Mat_MPIBAIJ *b; 2848 PetscErrorCode ierr; 2849 PetscInt i; 2850 2851 PetscFunctionBegin; 2852 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2853 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2854 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2855 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2856 2857 if (d_nnz) { 2858 for (i=0; i<B->rmap->n/bs; i++) { 2859 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2860 } 2861 } 2862 if (o_nnz) { 2863 for (i=0; i<B->rmap->n/bs; i++) { 2864 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2865 } 2866 } 2867 2868 b = (Mat_MPIBAIJ*)B->data; 2869 b->bs2 = bs*bs; 2870 b->mbs = B->rmap->n/bs; 2871 b->nbs = B->cmap->n/bs; 2872 b->Mbs = B->rmap->N/bs; 2873 b->Nbs = B->cmap->N/bs; 2874 2875 for (i=0; i<=b->size; i++) { 2876 b->rangebs[i] = B->rmap->range[i]/bs; 2877 } 2878 b->rstartbs = B->rmap->rstart/bs; 2879 b->rendbs = B->rmap->rend/bs; 2880 b->cstartbs = B->cmap->rstart/bs; 2881 b->cendbs = B->cmap->rend/bs; 2882 2883 if (!B->preallocated) { 2884 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2885 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2886 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2887 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2888 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2889 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2890 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2891 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2892 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2893 } 2894 2895 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2896 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2897 B->preallocated = PETSC_TRUE; 2898 PetscFunctionReturn(0); 2899 } 2900 2901 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2902 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2903 2904 #undef __FUNCT__ 2905 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 2906 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 2907 { 2908 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2909 PetscErrorCode ierr; 2910 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2911 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2912 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2913 2914 PetscFunctionBegin; 2915 ierr = PetscMalloc1(M+1,&ii);CHKERRQ(ierr); 2916 ii[0] = 0; 2917 for (i=0; i<M; i++) { 2918 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2919 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 2920 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2921 /* remove one from count of matrix has diagonal */ 2922 for (j=id[i]; j<id[i+1]; j++) { 2923 if (jd[j] == i) {ii[i+1]--;break;} 2924 } 2925 } 2926 ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr); 2927 cnt = 0; 2928 for (i=0; i<M; i++) { 2929 for (j=io[i]; j<io[i+1]; j++) { 2930 if (garray[jo[j]] > rstart) break; 2931 jj[cnt++] = garray[jo[j]]; 2932 } 2933 for (k=id[i]; k<id[i+1]; k++) { 2934 if (jd[k] != i) { 2935 jj[cnt++] = rstart + jd[k]; 2936 } 2937 } 2938 for (; j<io[i+1]; j++) { 2939 jj[cnt++] = garray[jo[j]]; 2940 } 2941 } 2942 ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr); 2943 PetscFunctionReturn(0); 2944 } 2945 2946 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2947 2948 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 2949 2950 #undef __FUNCT__ 2951 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 2952 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 2953 { 2954 PetscErrorCode ierr; 2955 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2956 Mat B; 2957 Mat_MPIAIJ *b; 2958 2959 PetscFunctionBegin; 2960 if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 2961 2962 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2963 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2964 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2965 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2966 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 2967 ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 2968 b = (Mat_MPIAIJ*) B->data; 2969 2970 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 2971 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2972 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 2973 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 2974 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 2975 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2976 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2977 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2978 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2979 if (reuse == MAT_REUSE_MATRIX) { 2980 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 2981 } else { 2982 *newmat = B; 2983 } 2984 PetscFunctionReturn(0); 2985 } 2986 2987 /*MC 2988 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2989 2990 Options Database Keys: 2991 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2992 . -mat_block_size <bs> - set the blocksize used to store the matrix 2993 - -mat_use_hash_table <fact> 2994 2995 Level: beginner 2996 2997 .seealso: MatCreateMPIBAIJ 2998 M*/ 2999 3000 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 3001 3002 #undef __FUNCT__ 3003 #define __FUNCT__ "MatCreate_MPIBAIJ" 3004 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3005 { 3006 Mat_MPIBAIJ *b; 3007 PetscErrorCode ierr; 3008 PetscBool flg = PETSC_FALSE; 3009 3010 PetscFunctionBegin; 3011 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3012 B->data = (void*)b; 3013 3014 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3015 B->assembled = PETSC_FALSE; 3016 3017 B->insertmode = NOT_SET_VALUES; 3018 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 3019 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 3020 3021 /* build local table of row and column ownerships */ 3022 ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr); 3023 3024 /* build cache for off array entries formed */ 3025 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 3026 3027 b->donotstash = PETSC_FALSE; 3028 b->colmap = NULL; 3029 b->garray = NULL; 3030 b->roworiented = PETSC_TRUE; 3031 3032 /* stuff used in block assembly */ 3033 b->barray = 0; 3034 3035 /* stuff used for matrix vector multiply */ 3036 b->lvec = 0; 3037 b->Mvctx = 0; 3038 3039 /* stuff for MatGetRow() */ 3040 b->rowindices = 0; 3041 b->rowvalues = 0; 3042 b->getrowactive = PETSC_FALSE; 3043 3044 /* hash table stuff */ 3045 b->ht = 0; 3046 b->hd = 0; 3047 b->ht_size = 0; 3048 b->ht_flag = PETSC_FALSE; 3049 b->ht_fact = 0; 3050 b->ht_total_ct = 0; 3051 b->ht_insert_ct = 0; 3052 3053 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 3054 b->ijonly = PETSC_FALSE; 3055 3056 3057 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3058 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3059 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3060 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3061 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3062 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3063 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3064 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3065 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3066 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3067 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr); 3068 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3069 3070 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3071 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 3072 if (flg) { 3073 PetscReal fact = 1.39; 3074 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3075 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 3076 if (fact <= 1.0) fact = 1.39; 3077 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3078 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3079 } 3080 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3081 PetscFunctionReturn(0); 3082 } 3083 3084 /*MC 3085 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3086 3087 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3088 and MATMPIBAIJ otherwise. 3089 3090 Options Database Keys: 3091 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3092 3093 Level: beginner 3094 3095 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3096 M*/ 3097 3098 #undef __FUNCT__ 3099 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3100 /*@C 3101 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3102 (block compressed row). For good matrix assembly performance 3103 the user should preallocate the matrix storage by setting the parameters 3104 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3105 performance can be increased by more than a factor of 50. 3106 3107 Collective on Mat 3108 3109 Input Parameters: 3110 + B - the matrix 3111 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3112 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3113 . d_nz - number of block nonzeros per block row in diagonal portion of local 3114 submatrix (same for all local rows) 3115 . d_nnz - array containing the number of block nonzeros in the various block rows 3116 of the in diagonal portion of the local (possibly different for each block 3117 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 3118 set it even if it is zero. 3119 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3120 submatrix (same for all local rows). 3121 - o_nnz - array containing the number of nonzeros in the various block rows of the 3122 off-diagonal portion of the local submatrix (possibly different for 3123 each block row) or NULL. 3124 3125 If the *_nnz parameter is given then the *_nz parameter is ignored 3126 3127 Options Database Keys: 3128 + -mat_block_size - size of the blocks to use 3129 - -mat_use_hash_table <fact> 3130 3131 Notes: 3132 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3133 than it must be used on all processors that share the object for that argument. 3134 3135 Storage Information: 3136 For a square global matrix we define each processor's diagonal portion 3137 to be its local rows and the corresponding columns (a square submatrix); 3138 each processor's off-diagonal portion encompasses the remainder of the 3139 local matrix (a rectangular submatrix). 3140 3141 The user can specify preallocated storage for the diagonal part of 3142 the local submatrix with either d_nz or d_nnz (not both). Set 3143 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3144 memory allocation. Likewise, specify preallocated storage for the 3145 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3146 3147 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3148 the figure below we depict these three local rows and all columns (0-11). 3149 3150 .vb 3151 0 1 2 3 4 5 6 7 8 9 10 11 3152 -------------------------- 3153 row 3 |o o o d d d o o o o o o 3154 row 4 |o o o d d d o o o o o o 3155 row 5 |o o o d d d o o o o o o 3156 -------------------------- 3157 .ve 3158 3159 Thus, any entries in the d locations are stored in the d (diagonal) 3160 submatrix, and any entries in the o locations are stored in the 3161 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3162 stored simply in the MATSEQBAIJ format for compressed row storage. 3163 3164 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3165 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3166 In general, for PDE problems in which most nonzeros are near the diagonal, 3167 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3168 or you will get TERRIBLE performance; see the users' manual chapter on 3169 matrices. 3170 3171 You can call MatGetInfo() to get information on how effective the preallocation was; 3172 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3173 You can also run with the option -info and look for messages with the string 3174 malloc in them to see if additional memory allocation was needed. 3175 3176 Level: intermediate 3177 3178 .keywords: matrix, block, aij, compressed row, sparse, parallel 3179 3180 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3181 @*/ 3182 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3183 { 3184 PetscErrorCode ierr; 3185 3186 PetscFunctionBegin; 3187 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3188 PetscValidType(B,1); 3189 PetscValidLogicalCollectiveInt(B,bs,2); 3190 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3191 PetscFunctionReturn(0); 3192 } 3193 3194 #undef __FUNCT__ 3195 #define __FUNCT__ "MatCreateBAIJ" 3196 /*@C 3197 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3198 (block compressed row). For good matrix assembly performance 3199 the user should preallocate the matrix storage by setting the parameters 3200 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3201 performance can be increased by more than a factor of 50. 3202 3203 Collective on MPI_Comm 3204 3205 Input Parameters: 3206 + comm - MPI communicator 3207 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3208 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3209 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3210 This value should be the same as the local size used in creating the 3211 y vector for the matrix-vector product y = Ax. 3212 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3213 This value should be the same as the local size used in creating the 3214 x vector for the matrix-vector product y = Ax. 3215 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3216 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3217 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3218 submatrix (same for all local rows) 3219 . d_nnz - array containing the number of nonzero blocks in the various block rows 3220 of the in diagonal portion of the local (possibly different for each block 3221 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3222 and set it even if it is zero. 3223 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3224 submatrix (same for all local rows). 3225 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3226 off-diagonal portion of the local submatrix (possibly different for 3227 each block row) or NULL. 3228 3229 Output Parameter: 3230 . A - the matrix 3231 3232 Options Database Keys: 3233 + -mat_block_size - size of the blocks to use 3234 - -mat_use_hash_table <fact> 3235 3236 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3237 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3238 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3239 3240 Notes: 3241 If the *_nnz parameter is given then the *_nz parameter is ignored 3242 3243 A nonzero block is any block that as 1 or more nonzeros in it 3244 3245 The user MUST specify either the local or global matrix dimensions 3246 (possibly both). 3247 3248 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3249 than it must be used on all processors that share the object for that argument. 3250 3251 Storage Information: 3252 For a square global matrix we define each processor's diagonal portion 3253 to be its local rows and the corresponding columns (a square submatrix); 3254 each processor's off-diagonal portion encompasses the remainder of the 3255 local matrix (a rectangular submatrix). 3256 3257 The user can specify preallocated storage for the diagonal part of 3258 the local submatrix with either d_nz or d_nnz (not both). Set 3259 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3260 memory allocation. Likewise, specify preallocated storage for the 3261 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3262 3263 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3264 the figure below we depict these three local rows and all columns (0-11). 3265 3266 .vb 3267 0 1 2 3 4 5 6 7 8 9 10 11 3268 -------------------------- 3269 row 3 |o o o d d d o o o o o o 3270 row 4 |o o o d d d o o o o o o 3271 row 5 |o o o d d d o o o o o o 3272 -------------------------- 3273 .ve 3274 3275 Thus, any entries in the d locations are stored in the d (diagonal) 3276 submatrix, and any entries in the o locations are stored in the 3277 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3278 stored simply in the MATSEQBAIJ format for compressed row storage. 3279 3280 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3281 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3282 In general, for PDE problems in which most nonzeros are near the diagonal, 3283 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3284 or you will get TERRIBLE performance; see the users' manual chapter on 3285 matrices. 3286 3287 Level: intermediate 3288 3289 .keywords: matrix, block, aij, compressed row, sparse, parallel 3290 3291 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3292 @*/ 3293 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3294 { 3295 PetscErrorCode ierr; 3296 PetscMPIInt size; 3297 3298 PetscFunctionBegin; 3299 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3300 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3301 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3302 if (size > 1) { 3303 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3304 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3305 } else { 3306 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3307 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3308 } 3309 PetscFunctionReturn(0); 3310 } 3311 3312 #undef __FUNCT__ 3313 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3314 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3315 { 3316 Mat mat; 3317 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3318 PetscErrorCode ierr; 3319 PetscInt len=0; 3320 3321 PetscFunctionBegin; 3322 *newmat = 0; 3323 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3324 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3325 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3326 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3327 3328 mat->factortype = matin->factortype; 3329 mat->preallocated = PETSC_TRUE; 3330 mat->assembled = PETSC_TRUE; 3331 mat->insertmode = NOT_SET_VALUES; 3332 3333 a = (Mat_MPIBAIJ*)mat->data; 3334 mat->rmap->bs = matin->rmap->bs; 3335 a->bs2 = oldmat->bs2; 3336 a->mbs = oldmat->mbs; 3337 a->nbs = oldmat->nbs; 3338 a->Mbs = oldmat->Mbs; 3339 a->Nbs = oldmat->Nbs; 3340 3341 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3342 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3343 3344 a->size = oldmat->size; 3345 a->rank = oldmat->rank; 3346 a->donotstash = oldmat->donotstash; 3347 a->roworiented = oldmat->roworiented; 3348 a->rowindices = 0; 3349 a->rowvalues = 0; 3350 a->getrowactive = PETSC_FALSE; 3351 a->barray = 0; 3352 a->rstartbs = oldmat->rstartbs; 3353 a->rendbs = oldmat->rendbs; 3354 a->cstartbs = oldmat->cstartbs; 3355 a->cendbs = oldmat->cendbs; 3356 3357 /* hash table stuff */ 3358 a->ht = 0; 3359 a->hd = 0; 3360 a->ht_size = 0; 3361 a->ht_flag = oldmat->ht_flag; 3362 a->ht_fact = oldmat->ht_fact; 3363 a->ht_total_ct = 0; 3364 a->ht_insert_ct = 0; 3365 3366 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3367 if (oldmat->colmap) { 3368 #if defined(PETSC_USE_CTABLE) 3369 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3370 #else 3371 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 3372 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3373 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3374 #endif 3375 } else a->colmap = 0; 3376 3377 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3378 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 3379 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3380 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3381 } else a->garray = 0; 3382 3383 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3384 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3385 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3386 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3387 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3388 3389 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3390 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3391 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3392 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3393 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3394 *newmat = mat; 3395 PetscFunctionReturn(0); 3396 } 3397 3398 #undef __FUNCT__ 3399 #define __FUNCT__ "MatLoad_MPIBAIJ" 3400 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3401 { 3402 PetscErrorCode ierr; 3403 int fd; 3404 PetscInt i,nz,j,rstart,rend; 3405 PetscScalar *vals,*buf; 3406 MPI_Comm comm; 3407 MPI_Status status; 3408 PetscMPIInt rank,size,maxnz; 3409 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3410 PetscInt *locrowlens = NULL,*procsnz = NULL,*browners = NULL; 3411 PetscInt jj,*mycols,*ibuf,bs = newmat->rmap->bs,Mbs,mbs,extra_rows,mmax; 3412 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3413 PetscInt *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount; 3414 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3415 3416 PetscFunctionBegin; 3417 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3418 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3419 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3420 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3421 if (bs < 0) bs = 1; 3422 3423 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3424 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3425 if (!rank) { 3426 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3427 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3428 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3429 } 3430 3431 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3432 3433 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3434 M = header[1]; N = header[2]; 3435 3436 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3437 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3438 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3439 3440 /* If global sizes are set, check if they are consistent with that given in the file */ 3441 if (sizesset) { 3442 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3443 } 3444 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3445 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3446 3447 if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices"); 3448 3449 /* 3450 This code adds extra rows to make sure the number of rows is 3451 divisible by the blocksize 3452 */ 3453 Mbs = M/bs; 3454 extra_rows = bs - M + bs*Mbs; 3455 if (extra_rows == bs) extra_rows = 0; 3456 else Mbs++; 3457 if (extra_rows && !rank) { 3458 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3459 } 3460 3461 /* determine ownership of all rows */ 3462 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3463 mbs = Mbs/size + ((Mbs % size) > rank); 3464 m = mbs*bs; 3465 } else { /* User set */ 3466 m = newmat->rmap->n; 3467 mbs = m/bs; 3468 } 3469 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 3470 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3471 3472 /* process 0 needs enough room for process with most rows */ 3473 if (!rank) { 3474 mmax = rowners[1]; 3475 for (i=2; i<=size; i++) { 3476 mmax = PetscMax(mmax,rowners[i]); 3477 } 3478 mmax*=bs; 3479 } else mmax = -1; /* unused, but compiler warns anyway */ 3480 3481 rowners[0] = 0; 3482 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3483 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3484 rstart = rowners[rank]; 3485 rend = rowners[rank+1]; 3486 3487 /* distribute row lengths to all processors */ 3488 ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr); 3489 if (!rank) { 3490 mend = m; 3491 if (size == 1) mend = mend - extra_rows; 3492 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3493 for (j=mend; j<m; j++) locrowlens[j] = 1; 3494 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3495 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3496 for (j=0; j<m; j++) { 3497 procsnz[0] += locrowlens[j]; 3498 } 3499 for (i=1; i<size; i++) { 3500 mend = browners[i+1] - browners[i]; 3501 if (i == size-1) mend = mend - extra_rows; 3502 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3503 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3504 /* calculate the number of nonzeros on each processor */ 3505 for (j=0; j<browners[i+1]-browners[i]; j++) { 3506 procsnz[i] += rowlengths[j]; 3507 } 3508 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3509 } 3510 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3511 } else { 3512 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3513 } 3514 3515 if (!rank) { 3516 /* determine max buffer needed and allocate it */ 3517 maxnz = procsnz[0]; 3518 for (i=1; i<size; i++) { 3519 maxnz = PetscMax(maxnz,procsnz[i]); 3520 } 3521 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3522 3523 /* read in my part of the matrix column indices */ 3524 nz = procsnz[0]; 3525 ierr = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr); 3526 mycols = ibuf; 3527 if (size == 1) nz -= extra_rows; 3528 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3529 if (size == 1) { 3530 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 3531 } 3532 3533 /* read in every ones (except the last) and ship off */ 3534 for (i=1; i<size-1; i++) { 3535 nz = procsnz[i]; 3536 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3537 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3538 } 3539 /* read in the stuff for the last proc */ 3540 if (size != 1) { 3541 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3542 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3543 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3544 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3545 } 3546 ierr = PetscFree(cols);CHKERRQ(ierr); 3547 } else { 3548 /* determine buffer space needed for message */ 3549 nz = 0; 3550 for (i=0; i<m; i++) { 3551 nz += locrowlens[i]; 3552 } 3553 ierr = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr); 3554 mycols = ibuf; 3555 /* receive message of column indices*/ 3556 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3557 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3558 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3559 } 3560 3561 /* loop over local rows, determining number of off diagonal entries */ 3562 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 3563 ierr = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 3564 rowcount = 0; nzcount = 0; 3565 for (i=0; i<mbs; i++) { 3566 dcount = 0; 3567 odcount = 0; 3568 for (j=0; j<bs; j++) { 3569 kmax = locrowlens[rowcount]; 3570 for (k=0; k<kmax; k++) { 3571 tmp = mycols[nzcount++]/bs; 3572 if (!mask[tmp]) { 3573 mask[tmp] = 1; 3574 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3575 else masked1[dcount++] = tmp; 3576 } 3577 } 3578 rowcount++; 3579 } 3580 3581 dlens[i] = dcount; 3582 odlens[i] = odcount; 3583 3584 /* zero out the mask elements we set */ 3585 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3586 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3587 } 3588 3589 3590 if (!sizesset) { 3591 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3592 } 3593 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3594 3595 if (!rank) { 3596 ierr = PetscMalloc1(maxnz+1,&buf);CHKERRQ(ierr); 3597 /* read in my part of the matrix numerical values */ 3598 nz = procsnz[0]; 3599 vals = buf; 3600 mycols = ibuf; 3601 if (size == 1) nz -= extra_rows; 3602 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3603 if (size == 1) { 3604 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 3605 } 3606 3607 /* insert into matrix */ 3608 jj = rstart*bs; 3609 for (i=0; i<m; i++) { 3610 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3611 mycols += locrowlens[i]; 3612 vals += locrowlens[i]; 3613 jj++; 3614 } 3615 /* read in other processors (except the last one) and ship out */ 3616 for (i=1; i<size-1; i++) { 3617 nz = procsnz[i]; 3618 vals = buf; 3619 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3620 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3621 } 3622 /* the last proc */ 3623 if (size != 1) { 3624 nz = procsnz[i] - extra_rows; 3625 vals = buf; 3626 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3627 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3628 ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3629 } 3630 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3631 } else { 3632 /* receive numeric values */ 3633 ierr = PetscMalloc1(nz+1,&buf);CHKERRQ(ierr); 3634 3635 /* receive message of values*/ 3636 vals = buf; 3637 mycols = ibuf; 3638 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3639 3640 /* insert into matrix */ 3641 jj = rstart*bs; 3642 for (i=0; i<m; i++) { 3643 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3644 mycols += locrowlens[i]; 3645 vals += locrowlens[i]; 3646 jj++; 3647 } 3648 } 3649 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3650 ierr = PetscFree(buf);CHKERRQ(ierr); 3651 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3652 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3653 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3654 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3655 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3656 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3657 PetscFunctionReturn(0); 3658 } 3659 3660 #undef __FUNCT__ 3661 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3662 /*@ 3663 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3664 3665 Input Parameters: 3666 . mat - the matrix 3667 . fact - factor 3668 3669 Not Collective, each process can use a different factor 3670 3671 Level: advanced 3672 3673 Notes: 3674 This can also be set by the command line option: -mat_use_hash_table <fact> 3675 3676 .keywords: matrix, hashtable, factor, HT 3677 3678 .seealso: MatSetOption() 3679 @*/ 3680 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3681 { 3682 PetscErrorCode ierr; 3683 3684 PetscFunctionBegin; 3685 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3686 PetscFunctionReturn(0); 3687 } 3688 3689 #undef __FUNCT__ 3690 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3691 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3692 { 3693 Mat_MPIBAIJ *baij; 3694 3695 PetscFunctionBegin; 3696 baij = (Mat_MPIBAIJ*)mat->data; 3697 baij->ht_fact = fact; 3698 PetscFunctionReturn(0); 3699 } 3700 3701 #undef __FUNCT__ 3702 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3703 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3704 { 3705 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3706 3707 PetscFunctionBegin; 3708 if (Ad) *Ad = a->A; 3709 if (Ao) *Ao = a->B; 3710 if (colmap) *colmap = a->garray; 3711 PetscFunctionReturn(0); 3712 } 3713 3714 /* 3715 Special version for direct calls from Fortran (to eliminate two function call overheads 3716 */ 3717 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3718 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3719 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3720 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3721 #endif 3722 3723 #undef __FUNCT__ 3724 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3725 /*@C 3726 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3727 3728 Collective on Mat 3729 3730 Input Parameters: 3731 + mat - the matrix 3732 . min - number of input rows 3733 . im - input rows 3734 . nin - number of input columns 3735 . in - input columns 3736 . v - numerical values input 3737 - addvin - INSERT_VALUES or ADD_VALUES 3738 3739 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3740 3741 Level: advanced 3742 3743 .seealso: MatSetValuesBlocked() 3744 @*/ 3745 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3746 { 3747 /* convert input arguments to C version */ 3748 Mat mat = *matin; 3749 PetscInt m = *min, n = *nin; 3750 InsertMode addv = *addvin; 3751 3752 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3753 const MatScalar *value; 3754 MatScalar *barray = baij->barray; 3755 PetscBool roworiented = baij->roworiented; 3756 PetscErrorCode ierr; 3757 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3758 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3759 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3760 3761 PetscFunctionBegin; 3762 /* tasks normally handled by MatSetValuesBlocked() */ 3763 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3764 #if defined(PETSC_USE_DEBUG) 3765 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3766 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3767 #endif 3768 if (mat->assembled) { 3769 mat->was_assembled = PETSC_TRUE; 3770 mat->assembled = PETSC_FALSE; 3771 } 3772 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3773 3774 3775 if (!barray) { 3776 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 3777 baij->barray = barray; 3778 } 3779 3780 if (roworiented) stepval = (n-1)*bs; 3781 else stepval = (m-1)*bs; 3782 3783 for (i=0; i<m; i++) { 3784 if (im[i] < 0) continue; 3785 #if defined(PETSC_USE_DEBUG) 3786 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3787 #endif 3788 if (im[i] >= rstart && im[i] < rend) { 3789 row = im[i] - rstart; 3790 for (j=0; j<n; j++) { 3791 /* If NumCol = 1 then a copy is not required */ 3792 if ((roworiented) && (n == 1)) { 3793 barray = (MatScalar*)v + i*bs2; 3794 } else if ((!roworiented) && (m == 1)) { 3795 barray = (MatScalar*)v + j*bs2; 3796 } else { /* Here a copy is required */ 3797 if (roworiented) { 3798 value = v + i*(stepval+bs)*bs + j*bs; 3799 } else { 3800 value = v + j*(stepval+bs)*bs + i*bs; 3801 } 3802 for (ii=0; ii<bs; ii++,value+=stepval) { 3803 for (jj=0; jj<bs; jj++) { 3804 *barray++ = *value++; 3805 } 3806 } 3807 barray -=bs2; 3808 } 3809 3810 if (in[j] >= cstart && in[j] < cend) { 3811 col = in[j] - cstart; 3812 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3813 } else if (in[j] < 0) continue; 3814 #if defined(PETSC_USE_DEBUG) 3815 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 3816 #endif 3817 else { 3818 if (mat->was_assembled) { 3819 if (!baij->colmap) { 3820 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3821 } 3822 3823 #if defined(PETSC_USE_DEBUG) 3824 #if defined(PETSC_USE_CTABLE) 3825 { PetscInt data; 3826 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3827 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3828 } 3829 #else 3830 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3831 #endif 3832 #endif 3833 #if defined(PETSC_USE_CTABLE) 3834 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3835 col = (col - 1)/bs; 3836 #else 3837 col = (baij->colmap[in[j]] - 1)/bs; 3838 #endif 3839 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3840 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3841 col = in[j]; 3842 } 3843 } else col = in[j]; 3844 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3845 } 3846 } 3847 } else { 3848 if (!baij->donotstash) { 3849 if (roworiented) { 3850 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3851 } else { 3852 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3853 } 3854 } 3855 } 3856 } 3857 3858 /* task normally handled by MatSetValuesBlocked() */ 3859 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3860 PetscFunctionReturn(0); 3861 } 3862 3863 #undef __FUNCT__ 3864 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 3865 /*@ 3866 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 3867 CSR format the local rows. 3868 3869 Collective on MPI_Comm 3870 3871 Input Parameters: 3872 + comm - MPI communicator 3873 . bs - the block size, only a block size of 1 is supported 3874 . m - number of local rows (Cannot be PETSC_DECIDE) 3875 . n - This value should be the same as the local size used in creating the 3876 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3877 calculated if N is given) For square matrices n is almost always m. 3878 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3879 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3880 . i - row indices 3881 . j - column indices 3882 - a - matrix values 3883 3884 Output Parameter: 3885 . mat - the matrix 3886 3887 Level: intermediate 3888 3889 Notes: 3890 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3891 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3892 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3893 3894 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3895 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3896 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3897 with column-major ordering within blocks. 3898 3899 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3900 3901 .keywords: matrix, aij, compressed row, sparse, parallel 3902 3903 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3904 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3905 @*/ 3906 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3907 { 3908 PetscErrorCode ierr; 3909 3910 PetscFunctionBegin; 3911 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3912 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3913 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3914 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3915 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 3916 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3917 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3918 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3919 PetscFunctionReturn(0); 3920 } 3921 3922 #undef __FUNCT__ 3923 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIBAIJ" 3924 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3925 { 3926 PetscErrorCode ierr; 3927 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3928 PetscInt *indx; 3929 PetscScalar *values; 3930 3931 PetscFunctionBegin; 3932 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3933 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3934 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inmat->data; 3935 PetscInt *dnz,*onz,sum,mbs,Nbs; 3936 PetscInt *bindx,rmax=a->rmax,j; 3937 3938 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3939 mbs = m/bs; Nbs = N/cbs; 3940 if (n == PETSC_DECIDE) { 3941 ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr); 3942 } 3943 /* Check sum(n) = Nbs */ 3944 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3945 if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs); 3946 3947 ierr = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3948 rstart -= mbs; 3949 3950 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 3951 ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr); 3952 for (i=0; i<mbs; i++) { 3953 ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3954 nnz = nnz/bs; 3955 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3956 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3957 ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3958 } 3959 ierr = PetscFree(bindx);CHKERRQ(ierr); 3960 3961 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3962 ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3963 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3964 ierr = MatSetType(*outmat,MATMPIBAIJ);CHKERRQ(ierr); 3965 ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3966 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3967 } 3968 3969 /* numeric phase */ 3970 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3971 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3972 3973 for (i=0; i<m; i++) { 3974 ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3975 Ii = i + rstart; 3976 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3977 ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3978 } 3979 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3980 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3981 PetscFunctionReturn(0); 3982 } 3983