1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat); 7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec); 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 17 { 18 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 19 PetscErrorCode ierr; 20 PetscInt i,*idxb = 0; 21 PetscScalar *va,*vb; 22 Vec vtmp; 23 24 PetscFunctionBegin; 25 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 26 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 27 if (idx) { 28 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 29 } 30 31 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 32 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 33 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 34 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 35 36 for (i=0; i<A->rmap->n; i++){ 37 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 38 } 39 40 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 41 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 42 ierr = PetscFree(idxb);CHKERRQ(ierr); 43 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 44 PetscFunctionReturn(0); 45 } 46 47 EXTERN_C_BEGIN 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 50 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 51 { 52 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 53 PetscErrorCode ierr; 54 55 PetscFunctionBegin; 56 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 57 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 EXTERN_C_END 61 62 EXTERN_C_BEGIN 63 #undef __FUNCT__ 64 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 65 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 66 { 67 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 68 PetscErrorCode ierr; 69 70 PetscFunctionBegin; 71 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 72 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 73 PetscFunctionReturn(0); 74 } 75 EXTERN_C_END 76 77 /* 78 Local utility routine that creates a mapping from the global column 79 number to the local number in the off-diagonal part of the local 80 storage of the matrix. This is done in a non scalable way since the 81 length of colmap equals the global matrix length. 82 */ 83 #undef __FUNCT__ 84 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private" 85 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 86 { 87 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 88 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 89 PetscErrorCode ierr; 90 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 91 92 PetscFunctionBegin; 93 #if defined (PETSC_USE_CTABLE) 94 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 95 for (i=0; i<nbs; i++){ 96 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 97 } 98 #else 99 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 100 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 101 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 102 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 103 #endif 104 PetscFunctionReturn(0); 105 } 106 107 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 108 { \ 109 \ 110 brow = row/bs; \ 111 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 112 rmax = aimax[brow]; nrow = ailen[brow]; \ 113 bcol = col/bs; \ 114 ridx = row % bs; cidx = col % bs; \ 115 low = 0; high = nrow; \ 116 while (high-low > 3) { \ 117 t = (low+high)/2; \ 118 if (rp[t] > bcol) high = t; \ 119 else low = t; \ 120 } \ 121 for (_i=low; _i<high; _i++) { \ 122 if (rp[_i] > bcol) break; \ 123 if (rp[_i] == bcol) { \ 124 bap = ap + bs2*_i + bs*cidx + ridx; \ 125 if (addv == ADD_VALUES) *bap += value; \ 126 else *bap = value; \ 127 goto a_noinsert; \ 128 } \ 129 } \ 130 if (a->nonew == 1) goto a_noinsert; \ 131 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 132 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 133 N = nrow++ - 1; \ 134 /* shift up all the later entries in this row */ \ 135 for (ii=N; ii>=_i; ii--) { \ 136 rp[ii+1] = rp[ii]; \ 137 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 138 } \ 139 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 140 rp[_i] = bcol; \ 141 ap[bs2*_i + bs*cidx + ridx] = value; \ 142 a_noinsert:; \ 143 ailen[brow] = nrow; \ 144 } 145 146 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 147 { \ 148 brow = row/bs; \ 149 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 150 rmax = bimax[brow]; nrow = bilen[brow]; \ 151 bcol = col/bs; \ 152 ridx = row % bs; cidx = col % bs; \ 153 low = 0; high = nrow; \ 154 while (high-low > 3) { \ 155 t = (low+high)/2; \ 156 if (rp[t] > bcol) high = t; \ 157 else low = t; \ 158 } \ 159 for (_i=low; _i<high; _i++) { \ 160 if (rp[_i] > bcol) break; \ 161 if (rp[_i] == bcol) { \ 162 bap = ap + bs2*_i + bs*cidx + ridx; \ 163 if (addv == ADD_VALUES) *bap += value; \ 164 else *bap = value; \ 165 goto b_noinsert; \ 166 } \ 167 } \ 168 if (b->nonew == 1) goto b_noinsert; \ 169 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 170 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 171 CHKMEMQ;\ 172 N = nrow++ - 1; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 177 } \ 178 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 179 rp[_i] = bcol; \ 180 ap[bs2*_i + bs*cidx + ridx] = value; \ 181 b_noinsert:; \ 182 bilen[brow] = nrow; \ 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatSetValues_MPIBAIJ" 187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 188 { 189 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 190 MatScalar value; 191 PetscBool roworiented = baij->roworiented; 192 PetscErrorCode ierr; 193 PetscInt i,j,row,col; 194 PetscInt rstart_orig=mat->rmap->rstart; 195 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 196 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 197 198 /* Some Variables required in the macro */ 199 Mat A = baij->A; 200 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 201 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 202 MatScalar *aa=a->a; 203 204 Mat B = baij->B; 205 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 206 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 207 MatScalar *ba=b->a; 208 209 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 210 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 211 MatScalar *ap,*bap; 212 213 PetscFunctionBegin; 214 if (v) PetscValidScalarPointer(v,6); 215 for (i=0; i<m; i++) { 216 if (im[i] < 0) continue; 217 #if defined(PETSC_USE_DEBUG) 218 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 219 #endif 220 if (im[i] >= rstart_orig && im[i] < rend_orig) { 221 row = im[i] - rstart_orig; 222 for (j=0; j<n; j++) { 223 if (in[j] >= cstart_orig && in[j] < cend_orig){ 224 col = in[j] - cstart_orig; 225 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 226 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 227 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 228 } else if (in[j] < 0) continue; 229 #if defined(PETSC_USE_DEBUG) 230 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 231 #endif 232 else { 233 if (mat->was_assembled) { 234 if (!baij->colmap) { 235 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 236 } 237 #if defined (PETSC_USE_CTABLE) 238 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 239 col = col - 1; 240 #else 241 col = baij->colmap[in[j]/bs] - 1; 242 #endif 243 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 244 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 245 col = in[j]; 246 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 247 B = baij->B; 248 b = (Mat_SeqBAIJ*)(B)->data; 249 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 250 ba=b->a; 251 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 252 else col += in[j]%bs; 253 } else col = in[j]; 254 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 255 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 256 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 257 } 258 } 259 } else { 260 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 261 if (!baij->donotstash) { 262 mat->assembled = PETSC_FALSE; 263 if (roworiented) { 264 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 265 } else { 266 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 267 } 268 } 269 } 270 } 271 PetscFunctionReturn(0); 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 276 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 277 { 278 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 279 const PetscScalar *value; 280 MatScalar *barray=baij->barray; 281 PetscBool roworiented = baij->roworiented; 282 PetscErrorCode ierr; 283 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 284 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 285 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 286 287 PetscFunctionBegin; 288 if (!barray) { 289 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 290 baij->barray = barray; 291 } 292 293 if (roworiented) { 294 stepval = (n-1)*bs; 295 } else { 296 stepval = (m-1)*bs; 297 } 298 for (i=0; i<m; i++) { 299 if (im[i] < 0) continue; 300 #if defined(PETSC_USE_DEBUG) 301 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 302 #endif 303 if (im[i] >= rstart && im[i] < rend) { 304 row = im[i] - rstart; 305 for (j=0; j<n; j++) { 306 /* If NumCol = 1 then a copy is not required */ 307 if ((roworiented) && (n == 1)) { 308 barray = (MatScalar*)v + i*bs2; 309 } else if ((!roworiented) && (m == 1)) { 310 barray = (MatScalar*)v + j*bs2; 311 } else { /* Here a copy is required */ 312 if (roworiented) { 313 value = v + (i*(stepval+bs) + j)*bs; 314 } else { 315 value = v + (j*(stepval+bs) + i)*bs; 316 } 317 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 318 for (jj=0; jj<bs; jj++) { 319 barray[jj] = value[jj]; 320 } 321 barray += bs; 322 } 323 barray -= bs2; 324 } 325 326 if (in[j] >= cstart && in[j] < cend){ 327 col = in[j] - cstart; 328 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 329 } 330 else if (in[j] < 0) continue; 331 #if defined(PETSC_USE_DEBUG) 332 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 333 #endif 334 else { 335 if (mat->was_assembled) { 336 if (!baij->colmap) { 337 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 338 } 339 340 #if defined(PETSC_USE_DEBUG) 341 #if defined (PETSC_USE_CTABLE) 342 { PetscInt data; 343 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 344 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 345 } 346 #else 347 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 348 #endif 349 #endif 350 #if defined (PETSC_USE_CTABLE) 351 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 352 col = (col - 1)/bs; 353 #else 354 col = (baij->colmap[in[j]] - 1)/bs; 355 #endif 356 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 357 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 358 col = in[j]; 359 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]); 360 } 361 else col = in[j]; 362 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 363 } 364 } 365 } else { 366 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 367 if (!baij->donotstash) { 368 if (roworiented) { 369 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 370 } else { 371 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 372 } 373 } 374 } 375 } 376 PetscFunctionReturn(0); 377 } 378 379 #define HASH_KEY 0.6180339887 380 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 381 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 382 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 383 #undef __FUNCT__ 384 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 385 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 386 { 387 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 388 PetscBool roworiented = baij->roworiented; 389 PetscErrorCode ierr; 390 PetscInt i,j,row,col; 391 PetscInt rstart_orig=mat->rmap->rstart; 392 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 393 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 394 PetscReal tmp; 395 MatScalar **HD = baij->hd,value; 396 #if defined(PETSC_USE_DEBUG) 397 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 398 #endif 399 400 PetscFunctionBegin; 401 if (v) PetscValidScalarPointer(v,6); 402 for (i=0; i<m; i++) { 403 #if defined(PETSC_USE_DEBUG) 404 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 405 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 406 #endif 407 row = im[i]; 408 if (row >= rstart_orig && row < rend_orig) { 409 for (j=0; j<n; j++) { 410 col = in[j]; 411 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 412 /* Look up PetscInto the Hash Table */ 413 key = (row/bs)*Nbs+(col/bs)+1; 414 h1 = HASH(size,key,tmp); 415 416 417 idx = h1; 418 #if defined(PETSC_USE_DEBUG) 419 insert_ct++; 420 total_ct++; 421 if (HT[idx] != key) { 422 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 423 if (idx == size) { 424 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 425 if (idx == h1) { 426 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 427 } 428 } 429 } 430 #else 431 if (HT[idx] != key) { 432 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 433 if (idx == size) { 434 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 435 if (idx == h1) { 436 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 437 } 438 } 439 } 440 #endif 441 /* A HASH table entry is found, so insert the values at the correct address */ 442 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 443 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 444 } 445 } else { 446 if (!baij->donotstash) { 447 if (roworiented) { 448 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 449 } else { 450 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 451 } 452 } 453 } 454 } 455 #if defined(PETSC_USE_DEBUG) 456 baij->ht_total_ct = total_ct; 457 baij->ht_insert_ct = insert_ct; 458 #endif 459 PetscFunctionReturn(0); 460 } 461 462 #undef __FUNCT__ 463 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 464 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 465 { 466 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 467 PetscBool roworiented = baij->roworiented; 468 PetscErrorCode ierr; 469 PetscInt i,j,ii,jj,row,col; 470 PetscInt rstart=baij->rstartbs; 471 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 472 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 473 PetscReal tmp; 474 MatScalar **HD = baij->hd,*baij_a; 475 const PetscScalar *v_t,*value; 476 #if defined(PETSC_USE_DEBUG) 477 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 478 #endif 479 480 PetscFunctionBegin; 481 482 if (roworiented) { 483 stepval = (n-1)*bs; 484 } else { 485 stepval = (m-1)*bs; 486 } 487 for (i=0; i<m; i++) { 488 #if defined(PETSC_USE_DEBUG) 489 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 490 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 491 #endif 492 row = im[i]; 493 v_t = v + i*nbs2; 494 if (row >= rstart && row < rend) { 495 for (j=0; j<n; j++) { 496 col = in[j]; 497 498 /* Look up into the Hash Table */ 499 key = row*Nbs+col+1; 500 h1 = HASH(size,key,tmp); 501 502 idx = h1; 503 #if defined(PETSC_USE_DEBUG) 504 total_ct++; 505 insert_ct++; 506 if (HT[idx] != key) { 507 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 508 if (idx == size) { 509 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 510 if (idx == h1) { 511 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 512 } 513 } 514 } 515 #else 516 if (HT[idx] != key) { 517 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 518 if (idx == size) { 519 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 520 if (idx == h1) { 521 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 522 } 523 } 524 } 525 #endif 526 baij_a = HD[idx]; 527 if (roworiented) { 528 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 529 /* value = v + (i*(stepval+bs)+j)*bs; */ 530 value = v_t; 531 v_t += bs; 532 if (addv == ADD_VALUES) { 533 for (ii=0; ii<bs; ii++,value+=stepval) { 534 for (jj=ii; jj<bs2; jj+=bs) { 535 baij_a[jj] += *value++; 536 } 537 } 538 } else { 539 for (ii=0; ii<bs; ii++,value+=stepval) { 540 for (jj=ii; jj<bs2; jj+=bs) { 541 baij_a[jj] = *value++; 542 } 543 } 544 } 545 } else { 546 value = v + j*(stepval+bs)*bs + i*bs; 547 if (addv == ADD_VALUES) { 548 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 549 for (jj=0; jj<bs; jj++) { 550 baij_a[jj] += *value++; 551 } 552 } 553 } else { 554 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 555 for (jj=0; jj<bs; jj++) { 556 baij_a[jj] = *value++; 557 } 558 } 559 } 560 } 561 } 562 } else { 563 if (!baij->donotstash) { 564 if (roworiented) { 565 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 566 } else { 567 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 568 } 569 } 570 } 571 } 572 #if defined(PETSC_USE_DEBUG) 573 baij->ht_total_ct = total_ct; 574 baij->ht_insert_ct = insert_ct; 575 #endif 576 PetscFunctionReturn(0); 577 } 578 579 #undef __FUNCT__ 580 #define __FUNCT__ "MatGetValues_MPIBAIJ" 581 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 582 { 583 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 584 PetscErrorCode ierr; 585 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 586 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 587 588 PetscFunctionBegin; 589 for (i=0; i<m; i++) { 590 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 591 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 592 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 593 row = idxm[i] - bsrstart; 594 for (j=0; j<n; j++) { 595 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 596 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 597 if (idxn[j] >= bscstart && idxn[j] < bscend){ 598 col = idxn[j] - bscstart; 599 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 600 } else { 601 if (!baij->colmap) { 602 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 603 } 604 #if defined (PETSC_USE_CTABLE) 605 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 606 data --; 607 #else 608 data = baij->colmap[idxn[j]/bs]-1; 609 #endif 610 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 611 else { 612 col = data + idxn[j]%bs; 613 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 614 } 615 } 616 } 617 } else { 618 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 619 } 620 } 621 PetscFunctionReturn(0); 622 } 623 624 #undef __FUNCT__ 625 #define __FUNCT__ "MatNorm_MPIBAIJ" 626 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 627 { 628 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 629 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 630 PetscErrorCode ierr; 631 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 632 PetscReal sum = 0.0; 633 MatScalar *v; 634 635 PetscFunctionBegin; 636 if (baij->size == 1) { 637 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 638 } else { 639 if (type == NORM_FROBENIUS) { 640 v = amat->a; 641 nz = amat->nz*bs2; 642 for (i=0; i<nz; i++) { 643 #if defined(PETSC_USE_COMPLEX) 644 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 645 #else 646 sum += (*v)*(*v); v++; 647 #endif 648 } 649 v = bmat->a; 650 nz = bmat->nz*bs2; 651 for (i=0; i<nz; i++) { 652 #if defined(PETSC_USE_COMPLEX) 653 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 654 #else 655 sum += (*v)*(*v); v++; 656 #endif 657 } 658 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 659 *nrm = PetscSqrtReal(*nrm); 660 } else if (type == NORM_1) { /* max column sum */ 661 PetscReal *tmp,*tmp2; 662 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 663 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 664 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 665 v = amat->a; jj = amat->j; 666 for (i=0; i<amat->nz; i++) { 667 for (j=0; j<bs; j++){ 668 col = bs*(cstart + *jj) + j; /* column index */ 669 for (row=0; row<bs; row++){ 670 tmp[col] += PetscAbsScalar(*v); v++; 671 } 672 } 673 jj++; 674 } 675 v = bmat->a; jj = bmat->j; 676 for (i=0; i<bmat->nz; i++) { 677 for (j=0; j<bs; j++){ 678 col = bs*garray[*jj] + j; 679 for (row=0; row<bs; row++){ 680 tmp[col] += PetscAbsScalar(*v); v++; 681 } 682 } 683 jj++; 684 } 685 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 686 *nrm = 0.0; 687 for (j=0; j<mat->cmap->N; j++) { 688 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 689 } 690 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 691 } else if (type == NORM_INFINITY) { /* max row sum */ 692 PetscReal *sums; 693 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 694 sum = 0.0; 695 for (j=0; j<amat->mbs; j++) { 696 for (row=0; row<bs; row++) sums[row] = 0.0; 697 v = amat->a + bs2*amat->i[j]; 698 nz = amat->i[j+1]-amat->i[j]; 699 for (i=0; i<nz; i++) { 700 for (col=0; col<bs; col++){ 701 for (row=0; row<bs; row++){ 702 sums[row] += PetscAbsScalar(*v); v++; 703 } 704 } 705 } 706 v = bmat->a + bs2*bmat->i[j]; 707 nz = bmat->i[j+1]-bmat->i[j]; 708 for (i=0; i<nz; i++) { 709 for (col=0; col<bs; col++){ 710 for (row=0; row<bs; row++){ 711 sums[row] += PetscAbsScalar(*v); v++; 712 } 713 } 714 } 715 for (row=0; row<bs; row++){ 716 if (sums[row] > sum) sum = sums[row]; 717 } 718 } 719 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 720 ierr = PetscFree(sums);CHKERRQ(ierr); 721 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet"); 722 } 723 PetscFunctionReturn(0); 724 } 725 726 /* 727 Creates the hash table, and sets the table 728 This table is created only once. 729 If new entried need to be added to the matrix 730 then the hash table has to be destroyed and 731 recreated. 732 */ 733 #undef __FUNCT__ 734 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 735 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 736 { 737 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 738 Mat A = baij->A,B=baij->B; 739 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 740 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 741 PetscErrorCode ierr; 742 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 743 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 744 PetscInt *HT,key; 745 MatScalar **HD; 746 PetscReal tmp; 747 #if defined(PETSC_USE_INFO) 748 PetscInt ct=0,max=0; 749 #endif 750 751 PetscFunctionBegin; 752 if (baij->ht) PetscFunctionReturn(0); 753 754 baij->ht_size = (PetscInt)(factor*nz); 755 ht_size = baij->ht_size; 756 757 /* Allocate Memory for Hash Table */ 758 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 759 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 760 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 761 HD = baij->hd; 762 HT = baij->ht; 763 764 /* Loop Over A */ 765 for (i=0; i<a->mbs; i++) { 766 for (j=ai[i]; j<ai[i+1]; j++) { 767 row = i+rstart; 768 col = aj[j]+cstart; 769 770 key = row*Nbs + col + 1; 771 h1 = HASH(ht_size,key,tmp); 772 for (k=0; k<ht_size; k++){ 773 if (!HT[(h1+k)%ht_size]) { 774 HT[(h1+k)%ht_size] = key; 775 HD[(h1+k)%ht_size] = a->a + j*bs2; 776 break; 777 #if defined(PETSC_USE_INFO) 778 } else { 779 ct++; 780 #endif 781 } 782 } 783 #if defined(PETSC_USE_INFO) 784 if (k> max) max = k; 785 #endif 786 } 787 } 788 /* Loop Over B */ 789 for (i=0; i<b->mbs; i++) { 790 for (j=bi[i]; j<bi[i+1]; j++) { 791 row = i+rstart; 792 col = garray[bj[j]]; 793 key = row*Nbs + col + 1; 794 h1 = HASH(ht_size,key,tmp); 795 for (k=0; k<ht_size; k++){ 796 if (!HT[(h1+k)%ht_size]) { 797 HT[(h1+k)%ht_size] = key; 798 HD[(h1+k)%ht_size] = b->a + j*bs2; 799 break; 800 #if defined(PETSC_USE_INFO) 801 } else { 802 ct++; 803 #endif 804 } 805 } 806 #if defined(PETSC_USE_INFO) 807 if (k> max) max = k; 808 #endif 809 } 810 } 811 812 /* Print Summary */ 813 #if defined(PETSC_USE_INFO) 814 for (i=0,j=0; i<ht_size; i++) { 815 if (HT[i]) {j++;} 816 } 817 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 818 #endif 819 PetscFunctionReturn(0); 820 } 821 822 #undef __FUNCT__ 823 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 824 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 825 { 826 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 827 PetscErrorCode ierr; 828 PetscInt nstash,reallocs; 829 InsertMode addv; 830 831 PetscFunctionBegin; 832 if (baij->donotstash || mat->nooffprocentries) { 833 PetscFunctionReturn(0); 834 } 835 836 /* make sure all processors are either in INSERTMODE or ADDMODE */ 837 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 838 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 839 mat->insertmode = addv; /* in case this processor had no cache */ 840 841 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 842 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 843 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 844 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 845 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 846 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 847 PetscFunctionReturn(0); 848 } 849 850 #undef __FUNCT__ 851 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 852 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 853 { 854 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 855 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 856 PetscErrorCode ierr; 857 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 858 PetscInt *row,*col; 859 PetscBool r1,r2,r3,other_disassembled; 860 MatScalar *val; 861 InsertMode addv = mat->insertmode; 862 PetscMPIInt n; 863 864 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 865 PetscFunctionBegin; 866 if (!baij->donotstash && !mat->nooffprocentries) { 867 while (1) { 868 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 869 if (!flg) break; 870 871 for (i=0; i<n;) { 872 /* Now identify the consecutive vals belonging to the same row */ 873 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 874 if (j < n) ncols = j-i; 875 else ncols = n-i; 876 /* Now assemble all these values with a single function call */ 877 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 878 i = j; 879 } 880 } 881 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 882 /* Now process the block-stash. Since the values are stashed column-oriented, 883 set the roworiented flag to column oriented, and after MatSetValues() 884 restore the original flags */ 885 r1 = baij->roworiented; 886 r2 = a->roworiented; 887 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 888 baij->roworiented = PETSC_FALSE; 889 a->roworiented = PETSC_FALSE; 890 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 891 while (1) { 892 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 893 if (!flg) break; 894 895 for (i=0; i<n;) { 896 /* Now identify the consecutive vals belonging to the same row */ 897 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 898 if (j < n) ncols = j-i; 899 else ncols = n-i; 900 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 901 i = j; 902 } 903 } 904 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 905 baij->roworiented = r1; 906 a->roworiented = r2; 907 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 908 } 909 910 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 911 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 912 913 /* determine if any processor has disassembled, if so we must 914 also disassemble ourselfs, in order that we may reassemble. */ 915 /* 916 if nonzero structure of submatrix B cannot change then we know that 917 no processor disassembled thus we can skip this stuff 918 */ 919 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 920 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 921 if (mat->was_assembled && !other_disassembled) { 922 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 923 } 924 } 925 926 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 927 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 928 } 929 ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 930 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 931 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 932 933 #if defined(PETSC_USE_INFO) 934 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 935 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 936 baij->ht_total_ct = 0; 937 baij->ht_insert_ct = 0; 938 } 939 #endif 940 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 941 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 942 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 943 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 944 } 945 946 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 947 baij->rowvalues = 0; 948 PetscFunctionReturn(0); 949 } 950 951 #undef __FUNCT__ 952 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 953 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 954 { 955 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 956 PetscErrorCode ierr; 957 PetscMPIInt size = baij->size,rank = baij->rank; 958 PetscInt bs = mat->rmap->bs; 959 PetscBool iascii,isdraw; 960 PetscViewer sviewer; 961 PetscViewerFormat format; 962 963 PetscFunctionBegin; 964 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 965 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 966 if (iascii) { 967 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 968 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 969 MatInfo info; 970 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 971 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 972 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 973 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 974 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 975 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 976 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 977 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 978 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 979 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 980 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 981 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 982 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 983 PetscFunctionReturn(0); 984 } else if (format == PETSC_VIEWER_ASCII_INFO) { 985 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 986 PetscFunctionReturn(0); 987 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 988 PetscFunctionReturn(0); 989 } 990 } 991 992 if (isdraw) { 993 PetscDraw draw; 994 PetscBool isnull; 995 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 996 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 997 } 998 999 if (size == 1) { 1000 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1001 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1002 } else { 1003 /* assemble the entire matrix onto first processor. */ 1004 Mat A; 1005 Mat_SeqBAIJ *Aloc; 1006 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1007 MatScalar *a; 1008 1009 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1010 /* Perhaps this should be the type of mat? */ 1011 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1012 if (!rank) { 1013 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1014 } else { 1015 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1016 } 1017 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1018 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1019 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1020 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1021 1022 /* copy over the A part */ 1023 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1024 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1025 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1026 1027 for (i=0; i<mbs; i++) { 1028 rvals[0] = bs*(baij->rstartbs + i); 1029 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1030 for (j=ai[i]; j<ai[i+1]; j++) { 1031 col = (baij->cstartbs+aj[j])*bs; 1032 for (k=0; k<bs; k++) { 1033 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1034 col++; a += bs; 1035 } 1036 } 1037 } 1038 /* copy over the B part */ 1039 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1040 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1041 for (i=0; i<mbs; i++) { 1042 rvals[0] = bs*(baij->rstartbs + i); 1043 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1044 for (j=ai[i]; j<ai[i+1]; j++) { 1045 col = baij->garray[aj[j]]*bs; 1046 for (k=0; k<bs; k++) { 1047 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1048 col++; a += bs; 1049 } 1050 } 1051 } 1052 ierr = PetscFree(rvals);CHKERRQ(ierr); 1053 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1054 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1055 /* 1056 Everyone has to call to draw the matrix since the graphics waits are 1057 synchronized across all processors that share the PetscDraw object 1058 */ 1059 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1060 if (!rank) { 1061 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1062 /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/ 1063 PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ); 1064 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1065 } 1066 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1067 ierr = MatDestroy(&A);CHKERRQ(ierr); 1068 } 1069 PetscFunctionReturn(0); 1070 } 1071 1072 #undef __FUNCT__ 1073 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1074 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1075 { 1076 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1077 Mat_SeqBAIJ* A = (Mat_SeqBAIJ*)a->A->data; 1078 Mat_SeqBAIJ* B = (Mat_SeqBAIJ*)a->B->data; 1079 PetscErrorCode ierr; 1080 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1081 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1082 int fd; 1083 PetscScalar *column_values; 1084 FILE *file; 1085 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1086 PetscInt message_count,flowcontrolcount; 1087 1088 PetscFunctionBegin; 1089 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1090 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1091 nz = bs2*(A->nz + B->nz); 1092 rlen = mat->rmap->n; 1093 if (!rank) { 1094 header[0] = MAT_FILE_CLASSID; 1095 header[1] = mat->rmap->N; 1096 header[2] = mat->cmap->N; 1097 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1098 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1099 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1100 /* get largest number of rows any processor has */ 1101 range = mat->rmap->range; 1102 for (i=1; i<size; i++) { 1103 rlen = PetscMax(rlen,range[i+1] - range[i]); 1104 } 1105 } else { 1106 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1107 } 1108 1109 ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr); 1110 /* compute lengths of each row */ 1111 for (i=0; i<a->mbs; i++) { 1112 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1113 } 1114 /* store the row lengths to the file */ 1115 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1116 if (!rank) { 1117 MPI_Status status; 1118 ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr); 1119 rlen = (range[1] - range[0])/bs; 1120 for (i=0; i<rlen; i++) { 1121 for (j=0; j<bs; j++) { 1122 row_lens[i*bs+j] = bs*crow_lens[i]; 1123 } 1124 } 1125 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1126 for (i=1; i<size; i++) { 1127 rlen = (range[i+1] - range[i])/bs; 1128 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1129 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1130 for (k=0; k<rlen; k++) { 1131 for (j=0; j<bs; j++) { 1132 row_lens[k*bs+j] = bs*crow_lens[k]; 1133 } 1134 } 1135 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1136 } 1137 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1138 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1139 } else { 1140 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1141 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1142 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1143 } 1144 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1145 1146 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1147 information needed to make it for each row from a block row. This does require more communication but still not more than 1148 the communication needed for the nonzero values */ 1149 nzmax = nz; /* space a largest processor needs */ 1150 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1151 ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1152 cnt = 0; 1153 for (i=0; i<a->mbs; i++) { 1154 pcnt = cnt; 1155 for (j=B->i[i]; j<B->i[i+1]; j++) { 1156 if ( (col = garray[B->j[j]]) > cstart) break; 1157 for (l=0; l<bs; l++) { 1158 column_indices[cnt++] = bs*col+l; 1159 } 1160 } 1161 for (k=A->i[i]; k<A->i[i+1]; k++) { 1162 for (l=0; l<bs; l++) { 1163 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1164 } 1165 } 1166 for (; j<B->i[i+1]; j++) { 1167 for (l=0; l<bs; l++) { 1168 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1169 } 1170 } 1171 len = cnt - pcnt; 1172 for (k=1; k<bs; k++) { 1173 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1174 cnt += len; 1175 } 1176 } 1177 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1178 1179 /* store the columns to the file */ 1180 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1181 if (!rank) { 1182 MPI_Status status; 1183 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1184 for (i=1; i<size; i++) { 1185 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1186 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1187 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1188 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1189 } 1190 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1191 } else { 1192 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1193 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1194 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1195 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1196 } 1197 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1198 1199 /* load up the numerical values */ 1200 ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1201 cnt = 0; 1202 for (i=0; i<a->mbs; i++) { 1203 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1204 for (j=B->i[i]; j<B->i[i+1]; j++) { 1205 if ( garray[B->j[j]] > cstart) break; 1206 for (l=0; l<bs; l++) { 1207 for (ll=0; ll<bs; ll++) { 1208 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1209 } 1210 } 1211 cnt += bs; 1212 } 1213 for (k=A->i[i]; k<A->i[i+1]; k++) { 1214 for (l=0; l<bs; l++) { 1215 for (ll=0; ll<bs; ll++) { 1216 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1217 } 1218 } 1219 cnt += bs; 1220 } 1221 for (; j<B->i[i+1]; j++) { 1222 for (l=0; l<bs; l++) { 1223 for (ll=0; ll<bs; ll++) { 1224 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1225 } 1226 } 1227 cnt += bs; 1228 } 1229 cnt += (bs-1)*rlen; 1230 } 1231 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1232 1233 /* store the column values to the file */ 1234 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1235 if (!rank) { 1236 MPI_Status status; 1237 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1238 for (i=1; i<size; i++) { 1239 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1240 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1241 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1242 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1243 } 1244 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1245 } else { 1246 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1247 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1248 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1249 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1250 } 1251 ierr = PetscFree(column_values);CHKERRQ(ierr); 1252 1253 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1254 if (file) { 1255 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1256 } 1257 PetscFunctionReturn(0); 1258 } 1259 1260 #undef __FUNCT__ 1261 #define __FUNCT__ "MatView_MPIBAIJ" 1262 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1263 { 1264 PetscErrorCode ierr; 1265 PetscBool iascii,isdraw,issocket,isbinary; 1266 1267 PetscFunctionBegin; 1268 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1269 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1270 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1271 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1272 if (iascii || isdraw || issocket) { 1273 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1274 } else if (isbinary) { 1275 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1276 } else { 1277 SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1278 } 1279 PetscFunctionReturn(0); 1280 } 1281 1282 #undef __FUNCT__ 1283 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1284 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1285 { 1286 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1287 PetscErrorCode ierr; 1288 1289 PetscFunctionBegin; 1290 #if defined(PETSC_USE_LOG) 1291 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1292 #endif 1293 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1294 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1295 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1296 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1297 #if defined (PETSC_USE_CTABLE) 1298 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1299 #else 1300 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1301 #endif 1302 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1303 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1304 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1305 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1306 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1307 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1308 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1309 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1310 1311 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1312 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1313 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1314 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1315 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1316 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1317 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1318 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1319 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1320 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",PETSC_NULL);CHKERRQ(ierr); 1321 PetscFunctionReturn(0); 1322 } 1323 1324 #undef __FUNCT__ 1325 #define __FUNCT__ "MatMult_MPIBAIJ" 1326 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1327 { 1328 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1329 PetscErrorCode ierr; 1330 PetscInt nt; 1331 1332 PetscFunctionBegin; 1333 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1334 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1335 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1336 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1337 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1338 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1339 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1340 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1341 PetscFunctionReturn(0); 1342 } 1343 1344 #undef __FUNCT__ 1345 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1346 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1347 { 1348 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1349 PetscErrorCode ierr; 1350 1351 PetscFunctionBegin; 1352 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1353 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1354 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1355 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1356 PetscFunctionReturn(0); 1357 } 1358 1359 #undef __FUNCT__ 1360 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1361 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1362 { 1363 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1364 PetscErrorCode ierr; 1365 PetscBool merged; 1366 1367 PetscFunctionBegin; 1368 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1369 /* do nondiagonal part */ 1370 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1371 if (!merged) { 1372 /* send it on its way */ 1373 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1374 /* do local part */ 1375 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1376 /* receive remote parts: note this assumes the values are not actually */ 1377 /* inserted in yy until the next line */ 1378 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1379 } else { 1380 /* do local part */ 1381 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1382 /* send it on its way */ 1383 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1384 /* values actually were received in the Begin() but we need to call this nop */ 1385 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1386 } 1387 PetscFunctionReturn(0); 1388 } 1389 1390 #undef __FUNCT__ 1391 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1392 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1393 { 1394 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1395 PetscErrorCode ierr; 1396 1397 PetscFunctionBegin; 1398 /* do nondiagonal part */ 1399 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1400 /* send it on its way */ 1401 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1402 /* do local part */ 1403 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1404 /* receive remote parts: note this assumes the values are not actually */ 1405 /* inserted in yy until the next line, which is true for my implementation*/ 1406 /* but is not perhaps always true. */ 1407 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1408 PetscFunctionReturn(0); 1409 } 1410 1411 /* 1412 This only works correctly for square matrices where the subblock A->A is the 1413 diagonal block 1414 */ 1415 #undef __FUNCT__ 1416 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1417 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1418 { 1419 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1420 PetscErrorCode ierr; 1421 1422 PetscFunctionBegin; 1423 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1424 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1425 PetscFunctionReturn(0); 1426 } 1427 1428 #undef __FUNCT__ 1429 #define __FUNCT__ "MatScale_MPIBAIJ" 1430 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1431 { 1432 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1433 PetscErrorCode ierr; 1434 1435 PetscFunctionBegin; 1436 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1437 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1438 PetscFunctionReturn(0); 1439 } 1440 1441 #undef __FUNCT__ 1442 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1443 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1444 { 1445 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1446 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1447 PetscErrorCode ierr; 1448 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1449 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1450 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1451 1452 PetscFunctionBegin; 1453 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1454 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1455 mat->getrowactive = PETSC_TRUE; 1456 1457 if (!mat->rowvalues && (idx || v)) { 1458 /* 1459 allocate enough space to hold information from the longest row. 1460 */ 1461 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1462 PetscInt max = 1,mbs = mat->mbs,tmp; 1463 for (i=0; i<mbs; i++) { 1464 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1465 if (max < tmp) { max = tmp; } 1466 } 1467 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1468 } 1469 lrow = row - brstart; 1470 1471 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1472 if (!v) {pvA = 0; pvB = 0;} 1473 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1474 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1475 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1476 nztot = nzA + nzB; 1477 1478 cmap = mat->garray; 1479 if (v || idx) { 1480 if (nztot) { 1481 /* Sort by increasing column numbers, assuming A and B already sorted */ 1482 PetscInt imark = -1; 1483 if (v) { 1484 *v = v_p = mat->rowvalues; 1485 for (i=0; i<nzB; i++) { 1486 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1487 else break; 1488 } 1489 imark = i; 1490 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1491 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1492 } 1493 if (idx) { 1494 *idx = idx_p = mat->rowindices; 1495 if (imark > -1) { 1496 for (i=0; i<imark; i++) { 1497 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1498 } 1499 } else { 1500 for (i=0; i<nzB; i++) { 1501 if (cmap[cworkB[i]/bs] < cstart) 1502 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1503 else break; 1504 } 1505 imark = i; 1506 } 1507 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1508 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1509 } 1510 } else { 1511 if (idx) *idx = 0; 1512 if (v) *v = 0; 1513 } 1514 } 1515 *nz = nztot; 1516 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1517 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1518 PetscFunctionReturn(0); 1519 } 1520 1521 #undef __FUNCT__ 1522 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1523 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1524 { 1525 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1526 1527 PetscFunctionBegin; 1528 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1529 baij->getrowactive = PETSC_FALSE; 1530 PetscFunctionReturn(0); 1531 } 1532 1533 #undef __FUNCT__ 1534 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1535 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1536 { 1537 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1538 PetscErrorCode ierr; 1539 1540 PetscFunctionBegin; 1541 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1542 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1543 PetscFunctionReturn(0); 1544 } 1545 1546 #undef __FUNCT__ 1547 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1548 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1549 { 1550 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1551 Mat A = a->A,B = a->B; 1552 PetscErrorCode ierr; 1553 PetscReal isend[5],irecv[5]; 1554 1555 PetscFunctionBegin; 1556 info->block_size = (PetscReal)matin->rmap->bs; 1557 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1558 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1559 isend[3] = info->memory; isend[4] = info->mallocs; 1560 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1561 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1562 isend[3] += info->memory; isend[4] += info->mallocs; 1563 if (flag == MAT_LOCAL) { 1564 info->nz_used = isend[0]; 1565 info->nz_allocated = isend[1]; 1566 info->nz_unneeded = isend[2]; 1567 info->memory = isend[3]; 1568 info->mallocs = isend[4]; 1569 } else if (flag == MAT_GLOBAL_MAX) { 1570 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1571 info->nz_used = irecv[0]; 1572 info->nz_allocated = irecv[1]; 1573 info->nz_unneeded = irecv[2]; 1574 info->memory = irecv[3]; 1575 info->mallocs = irecv[4]; 1576 } else if (flag == MAT_GLOBAL_SUM) { 1577 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1578 info->nz_used = irecv[0]; 1579 info->nz_allocated = irecv[1]; 1580 info->nz_unneeded = irecv[2]; 1581 info->memory = irecv[3]; 1582 info->mallocs = irecv[4]; 1583 } else { 1584 SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1585 } 1586 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1587 info->fill_ratio_needed = 0; 1588 info->factor_mallocs = 0; 1589 PetscFunctionReturn(0); 1590 } 1591 1592 #undef __FUNCT__ 1593 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1594 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1595 { 1596 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1597 PetscErrorCode ierr; 1598 1599 PetscFunctionBegin; 1600 switch (op) { 1601 case MAT_NEW_NONZERO_LOCATIONS: 1602 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1603 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1604 case MAT_KEEP_NONZERO_PATTERN: 1605 case MAT_NEW_NONZERO_LOCATION_ERR: 1606 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1607 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1608 break; 1609 case MAT_ROW_ORIENTED: 1610 a->roworiented = flg; 1611 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1612 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1613 break; 1614 case MAT_NEW_DIAGONALS: 1615 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1616 break; 1617 case MAT_IGNORE_OFF_PROC_ENTRIES: 1618 a->donotstash = flg; 1619 break; 1620 case MAT_USE_HASH_TABLE: 1621 a->ht_flag = flg; 1622 break; 1623 case MAT_SYMMETRIC: 1624 case MAT_STRUCTURALLY_SYMMETRIC: 1625 case MAT_HERMITIAN: 1626 case MAT_SYMMETRY_ETERNAL: 1627 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1628 break; 1629 default: 1630 SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op); 1631 } 1632 PetscFunctionReturn(0); 1633 } 1634 1635 #undef __FUNCT__ 1636 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1637 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1638 { 1639 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1640 Mat_SeqBAIJ *Aloc; 1641 Mat B; 1642 PetscErrorCode ierr; 1643 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1644 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1645 MatScalar *a; 1646 1647 PetscFunctionBegin; 1648 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1649 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1650 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1651 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1652 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1653 /* Do not know preallocation information, but must set block size */ 1654 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,PETSC_NULL,PETSC_DECIDE,PETSC_NULL);CHKERRQ(ierr); 1655 } else { 1656 B = *matout; 1657 } 1658 1659 /* copy over the A part */ 1660 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1661 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1662 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1663 1664 for (i=0; i<mbs; i++) { 1665 rvals[0] = bs*(baij->rstartbs + i); 1666 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1667 for (j=ai[i]; j<ai[i+1]; j++) { 1668 col = (baij->cstartbs+aj[j])*bs; 1669 for (k=0; k<bs; k++) { 1670 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1671 col++; a += bs; 1672 } 1673 } 1674 } 1675 /* copy over the B part */ 1676 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1677 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1678 for (i=0; i<mbs; i++) { 1679 rvals[0] = bs*(baij->rstartbs + i); 1680 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1681 for (j=ai[i]; j<ai[i+1]; j++) { 1682 col = baij->garray[aj[j]]*bs; 1683 for (k=0; k<bs; k++) { 1684 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1685 col++; a += bs; 1686 } 1687 } 1688 } 1689 ierr = PetscFree(rvals);CHKERRQ(ierr); 1690 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1691 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1692 1693 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1694 *matout = B; 1695 } else { 1696 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1697 } 1698 PetscFunctionReturn(0); 1699 } 1700 1701 #undef __FUNCT__ 1702 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1703 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1704 { 1705 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1706 Mat a = baij->A,b = baij->B; 1707 PetscErrorCode ierr; 1708 PetscInt s1,s2,s3; 1709 1710 PetscFunctionBegin; 1711 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1712 if (rr) { 1713 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1714 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1715 /* Overlap communication with computation. */ 1716 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1717 } 1718 if (ll) { 1719 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1720 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1721 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1722 } 1723 /* scale the diagonal block */ 1724 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1725 1726 if (rr) { 1727 /* Do a scatter end and then right scale the off-diagonal block */ 1728 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1729 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1730 } 1731 1732 PetscFunctionReturn(0); 1733 } 1734 1735 #undef __FUNCT__ 1736 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1737 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1738 { 1739 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1740 PetscErrorCode ierr; 1741 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1742 PetscInt i,*owners = A->rmap->range; 1743 PetscInt *nprocs,j,idx,nsends,row; 1744 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1745 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1746 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1747 MPI_Comm comm = ((PetscObject)A)->comm; 1748 MPI_Request *send_waits,*recv_waits; 1749 MPI_Status recv_status,*send_status; 1750 const PetscScalar *xx; 1751 PetscScalar *bb; 1752 #if defined(PETSC_DEBUG) 1753 PetscBool found = PETSC_FALSE; 1754 #endif 1755 1756 PetscFunctionBegin; 1757 /* first count number of contributors to each processor */ 1758 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1759 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1760 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1761 j = 0; 1762 for (i=0; i<N; i++) { 1763 if (lastidx > (idx = rows[i])) j = 0; 1764 lastidx = idx; 1765 for (; j<size; j++) { 1766 if (idx >= owners[j] && idx < owners[j+1]) { 1767 nprocs[2*j]++; 1768 nprocs[2*j+1] = 1; 1769 owner[i] = j; 1770 #if defined(PETSC_DEBUG) 1771 found = PETSC_TRUE; 1772 #endif 1773 break; 1774 } 1775 } 1776 #if defined(PETSC_DEBUG) 1777 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1778 found = PETSC_FALSE; 1779 #endif 1780 } 1781 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1782 1783 if (A->nooffproczerorows) { 1784 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 1785 nrecvs = nsends; 1786 nmax = N; 1787 } else { 1788 /* inform other processors of number of messages and max length*/ 1789 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1790 } 1791 1792 /* post receives: */ 1793 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1794 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1795 for (i=0; i<nrecvs; i++) { 1796 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1797 } 1798 1799 /* do sends: 1800 1) starts[i] gives the starting index in svalues for stuff going to 1801 the ith processor 1802 */ 1803 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1804 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1805 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1806 starts[0] = 0; 1807 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1808 for (i=0; i<N; i++) { 1809 svalues[starts[owner[i]]++] = rows[i]; 1810 } 1811 1812 starts[0] = 0; 1813 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1814 count = 0; 1815 for (i=0; i<size; i++) { 1816 if (nprocs[2*i+1]) { 1817 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1818 } 1819 } 1820 ierr = PetscFree(starts);CHKERRQ(ierr); 1821 1822 base = owners[rank]; 1823 1824 /* wait on receives */ 1825 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1826 count = nrecvs; 1827 slen = 0; 1828 while (count) { 1829 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1830 /* unpack receives into our local space */ 1831 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1832 source[imdex] = recv_status.MPI_SOURCE; 1833 lens[imdex] = n; 1834 slen += n; 1835 count--; 1836 } 1837 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1838 1839 /* move the data into the send scatter */ 1840 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1841 count = 0; 1842 for (i=0; i<nrecvs; i++) { 1843 values = rvalues + i*nmax; 1844 for (j=0; j<lens[i]; j++) { 1845 lrows[count++] = values[j] - base; 1846 } 1847 } 1848 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1849 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1850 ierr = PetscFree(owner);CHKERRQ(ierr); 1851 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1852 1853 /* fix right hand side if needed */ 1854 if (x && b) { 1855 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1856 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1857 for (i=0; i<slen; i++) { 1858 bb[lrows[i]] = diag*xx[lrows[i]]; 1859 } 1860 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1861 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1862 } 1863 1864 /* actually zap the local rows */ 1865 /* 1866 Zero the required rows. If the "diagonal block" of the matrix 1867 is square and the user wishes to set the diagonal we use separate 1868 code so that MatSetValues() is not called for each diagonal allocating 1869 new memory, thus calling lots of mallocs and slowing things down. 1870 1871 */ 1872 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1873 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1874 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1875 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 1876 } else if (diag != 0.0) { 1877 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1878 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1879 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1880 for (i=0; i<slen; i++) { 1881 row = lrows[i] + rstart_bs; 1882 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1883 } 1884 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1885 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1886 } else { 1887 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1888 } 1889 1890 ierr = PetscFree(lrows);CHKERRQ(ierr); 1891 1892 /* wait on sends */ 1893 if (nsends) { 1894 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1895 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1896 ierr = PetscFree(send_status);CHKERRQ(ierr); 1897 } 1898 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1899 ierr = PetscFree(svalues);CHKERRQ(ierr); 1900 1901 PetscFunctionReturn(0); 1902 } 1903 1904 #undef __FUNCT__ 1905 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1906 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1907 { 1908 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1909 PetscErrorCode ierr; 1910 1911 PetscFunctionBegin; 1912 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1913 PetscFunctionReturn(0); 1914 } 1915 1916 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1917 1918 #undef __FUNCT__ 1919 #define __FUNCT__ "MatEqual_MPIBAIJ" 1920 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1921 { 1922 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1923 Mat a,b,c,d; 1924 PetscBool flg; 1925 PetscErrorCode ierr; 1926 1927 PetscFunctionBegin; 1928 a = matA->A; b = matA->B; 1929 c = matB->A; d = matB->B; 1930 1931 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1932 if (flg) { 1933 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1934 } 1935 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1936 PetscFunctionReturn(0); 1937 } 1938 1939 #undef __FUNCT__ 1940 #define __FUNCT__ "MatCopy_MPIBAIJ" 1941 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1942 { 1943 PetscErrorCode ierr; 1944 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1945 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1946 1947 PetscFunctionBegin; 1948 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1949 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1950 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1951 } else { 1952 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1953 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1954 } 1955 PetscFunctionReturn(0); 1956 } 1957 1958 #undef __FUNCT__ 1959 #define __FUNCT__ "MatSetUp_MPIBAIJ" 1960 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1961 { 1962 PetscErrorCode ierr; 1963 1964 PetscFunctionBegin; 1965 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1966 PetscFunctionReturn(0); 1967 } 1968 1969 #undef __FUNCT__ 1970 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1971 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1972 { 1973 PetscErrorCode ierr; 1974 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1975 PetscBLASInt bnz,one=1; 1976 Mat_SeqBAIJ *x,*y; 1977 1978 PetscFunctionBegin; 1979 if (str == SAME_NONZERO_PATTERN) { 1980 PetscScalar alpha = a; 1981 x = (Mat_SeqBAIJ *)xx->A->data; 1982 y = (Mat_SeqBAIJ *)yy->A->data; 1983 bnz = PetscBLASIntCast(x->nz); 1984 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1985 x = (Mat_SeqBAIJ *)xx->B->data; 1986 y = (Mat_SeqBAIJ *)yy->B->data; 1987 bnz = PetscBLASIntCast(x->nz); 1988 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1989 } else { 1990 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1991 } 1992 PetscFunctionReturn(0); 1993 } 1994 1995 #undef __FUNCT__ 1996 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1997 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1998 { 1999 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2000 PetscErrorCode ierr; 2001 2002 PetscFunctionBegin; 2003 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2004 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2005 PetscFunctionReturn(0); 2006 } 2007 2008 #undef __FUNCT__ 2009 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 2010 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2011 { 2012 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2013 PetscErrorCode ierr; 2014 2015 PetscFunctionBegin; 2016 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2017 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2018 PetscFunctionReturn(0); 2019 } 2020 2021 #undef __FUNCT__ 2022 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2023 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2024 { 2025 PetscErrorCode ierr; 2026 IS iscol_local; 2027 PetscInt csize; 2028 2029 PetscFunctionBegin; 2030 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2031 if (call == MAT_REUSE_MATRIX) { 2032 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2033 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2034 } else { 2035 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2036 } 2037 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2038 if (call == MAT_INITIAL_MATRIX) { 2039 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2040 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2041 } 2042 PetscFunctionReturn(0); 2043 } 2044 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*); 2045 #undef __FUNCT__ 2046 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2047 /* 2048 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2049 in local and then by concatenating the local matrices the end result. 2050 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2051 */ 2052 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2053 { 2054 PetscErrorCode ierr; 2055 PetscMPIInt rank,size; 2056 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2057 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow; 2058 Mat M,Mreuse; 2059 MatScalar *vwork,*aa; 2060 MPI_Comm comm = ((PetscObject)mat)->comm; 2061 IS isrow_new, iscol_new; 2062 PetscBool idflag,allrows, allcols; 2063 Mat_SeqBAIJ *aij; 2064 2065 2066 PetscFunctionBegin; 2067 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2068 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2069 /* The compression and expansion should be avoided. Doesn't point 2070 out errors, might change the indices, hence buggey */ 2071 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 2072 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 2073 2074 /* Check for special case: each processor gets entire matrix columns */ 2075 ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr); 2076 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 2077 if (idflag && ncol == mat->cmap->N){ 2078 allcols = PETSC_TRUE; 2079 } else { 2080 allcols = PETSC_FALSE; 2081 } 2082 2083 ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr); 2084 ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr); 2085 if (idflag && nrow == mat->rmap->N){ 2086 allrows = PETSC_TRUE; 2087 } else { 2088 allrows = PETSC_FALSE; 2089 } 2090 if (call == MAT_REUSE_MATRIX) { 2091 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2092 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2093 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2094 } else { 2095 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2096 } 2097 ierr = ISDestroy(&isrow_new); CHKERRQ(ierr); 2098 ierr = ISDestroy(&iscol_new); CHKERRQ(ierr); 2099 /* 2100 m - number of local rows 2101 n - number of columns (same on all processors) 2102 rstart - first row in new global matrix generated 2103 */ 2104 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2105 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2106 m = m/bs; 2107 n = n/bs; 2108 2109 if (call == MAT_INITIAL_MATRIX) { 2110 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2111 ii = aij->i; 2112 jj = aij->j; 2113 2114 /* 2115 Determine the number of non-zeros in the diagonal and off-diagonal 2116 portions of the matrix in order to do correct preallocation 2117 */ 2118 2119 /* first get start and end of "diagonal" columns */ 2120 if (csize == PETSC_DECIDE) { 2121 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2122 if (mglobal == n*bs) { /* square matrix */ 2123 nlocal = m; 2124 } else { 2125 nlocal = n/size + ((n % size) > rank); 2126 } 2127 } else { 2128 nlocal = csize/bs; 2129 } 2130 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2131 rstart = rend - nlocal; 2132 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2133 2134 /* next, compute all the lengths */ 2135 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2136 olens = dlens + m; 2137 for (i=0; i<m; i++) { 2138 jend = ii[i+1] - ii[i]; 2139 olen = 0; 2140 dlen = 0; 2141 for (j=0; j<jend; j++) { 2142 if (*jj < rstart || *jj >= rend) olen++; 2143 else dlen++; 2144 jj++; 2145 } 2146 olens[i] = olen; 2147 dlens[i] = dlen; 2148 } 2149 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2150 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2151 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2152 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2153 ierr = PetscFree(dlens);CHKERRQ(ierr); 2154 } else { 2155 PetscInt ml,nl; 2156 2157 M = *newmat; 2158 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2159 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2160 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2161 /* 2162 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2163 rather than the slower MatSetValues(). 2164 */ 2165 M->was_assembled = PETSC_TRUE; 2166 M->assembled = PETSC_FALSE; 2167 } 2168 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2169 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2170 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2171 ii = aij->i; 2172 jj = aij->j; 2173 aa = aij->a; 2174 for (i=0; i<m; i++) { 2175 row = rstart/bs + i; 2176 nz = ii[i+1] - ii[i]; 2177 cwork = jj; jj += nz; 2178 vwork = aa; aa += nz; 2179 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2180 } 2181 2182 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2183 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2184 *newmat = M; 2185 2186 /* save submatrix used in processor for next request */ 2187 if (call == MAT_INITIAL_MATRIX) { 2188 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2189 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2190 } 2191 2192 PetscFunctionReturn(0); 2193 } 2194 2195 #undef __FUNCT__ 2196 #define __FUNCT__ "MatPermute_MPIBAIJ" 2197 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2198 { 2199 MPI_Comm comm,pcomm; 2200 PetscInt first,local_size,nrows; 2201 const PetscInt *rows; 2202 PetscMPIInt size; 2203 IS crowp,growp,irowp,lrowp,lcolp,icolp; 2204 PetscErrorCode ierr; 2205 2206 PetscFunctionBegin; 2207 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2208 /* make a collective version of 'rowp' */ 2209 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2210 if (pcomm==comm) { 2211 crowp = rowp; 2212 } else { 2213 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2214 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2215 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2216 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2217 } 2218 /* collect the global row permutation and invert it */ 2219 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2220 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2221 if (pcomm!=comm) { 2222 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2223 } 2224 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2225 /* get the local target indices */ 2226 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2227 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 2228 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2229 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 2230 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2231 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 2232 /* the column permutation is so much easier; 2233 make a local version of 'colp' and invert it */ 2234 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2235 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2236 if (size==1) { 2237 lcolp = colp; 2238 } else { 2239 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 2240 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 2241 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr); 2242 } 2243 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2244 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 2245 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 2246 if (size>1) { 2247 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 2248 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2249 } 2250 /* now we just get the submatrix */ 2251 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2252 /* clean up */ 2253 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 2254 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 2255 PetscFunctionReturn(0); 2256 } 2257 2258 #undef __FUNCT__ 2259 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2260 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2261 { 2262 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2263 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2264 2265 PetscFunctionBegin; 2266 if (nghosts) { *nghosts = B->nbs;} 2267 if (ghosts) {*ghosts = baij->garray;} 2268 PetscFunctionReturn(0); 2269 } 2270 2271 extern PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat); 2272 2273 #undef __FUNCT__ 2274 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2275 /* 2276 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2277 */ 2278 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2279 { 2280 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2281 PetscErrorCode ierr; 2282 PetscMPIInt size,*ncolsonproc,*disp,nn; 2283 PetscInt bs,i,n,nrows,j,k,m,ncols,col; 2284 const PetscInt *is,*rows = 0,*A_ci,*A_cj,*B_ci,*B_cj; 2285 PetscInt nis = iscoloring->n,nctot,*cols; 2286 PetscInt *rowhit,M,cstart,cend,colb; 2287 PetscInt *columnsforrow,l; 2288 IS *isa; 2289 PetscBool done,flg; 2290 ISLocalToGlobalMapping map = mat->cmap->bmapping; 2291 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2292 2293 PetscFunctionBegin; 2294 if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2295 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2296 2297 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2298 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2299 M = mat->rmap->n/bs; 2300 cstart = mat->cmap->rstart/bs; 2301 cend = mat->cmap->rend/bs; 2302 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2303 c->N = mat->cmap->N/bs; 2304 c->m = mat->rmap->n/bs; 2305 c->rstart = mat->rmap->rstart/bs; 2306 2307 c->ncolors = nis; 2308 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2309 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2310 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2311 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2312 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2313 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2314 2315 /* Allow access to data structures of local part of matrix */ 2316 if (!baij->colmap) { 2317 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2318 } 2319 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2320 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2321 2322 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2323 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2324 2325 for (i=0; i<nis; i++) { 2326 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2327 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2328 c->ncolumns[i] = n; 2329 if (n) { 2330 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2331 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2332 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2333 } else { 2334 c->columns[i] = 0; 2335 } 2336 2337 if (ctype == IS_COLORING_GLOBAL){ 2338 /* Determine the total (parallel) number of columns of this color */ 2339 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2340 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2341 2342 nn = PetscMPIIntCast(n); 2343 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2344 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2345 if (!nctot) { 2346 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2347 } 2348 2349 disp[0] = 0; 2350 for (j=1; j<size; j++) { 2351 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2352 } 2353 2354 /* Get complete list of columns for color on each processor */ 2355 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2356 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2357 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2358 } else if (ctype == IS_COLORING_GHOSTED){ 2359 /* Determine local number of columns of this color on this process, including ghost points */ 2360 nctot = n; 2361 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2362 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2363 } else { 2364 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2365 } 2366 2367 /* 2368 Mark all rows affect by these columns 2369 */ 2370 /* Temporary option to allow for debugging/testing */ 2371 flg = PETSC_FALSE; 2372 ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2373 if (!flg) {/*-----------------------------------------------------------------------------*/ 2374 /* crude, fast version */ 2375 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2376 /* loop over columns*/ 2377 for (j=0; j<nctot; j++) { 2378 if (ctype == IS_COLORING_GHOSTED) { 2379 col = ltog[cols[j]]; 2380 } else { 2381 col = cols[j]; 2382 } 2383 if (col >= cstart && col < cend) { 2384 /* column is in diagonal block of matrix */ 2385 rows = A_cj + A_ci[col-cstart]; 2386 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2387 } else { 2388 #if defined (PETSC_USE_CTABLE) 2389 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2390 colb --; 2391 #else 2392 colb = baij->colmap[col] - 1; 2393 #endif 2394 if (colb == -1) { 2395 m = 0; 2396 } else { 2397 colb = colb/bs; 2398 rows = B_cj + B_ci[colb]; 2399 m = B_ci[colb+1] - B_ci[colb]; 2400 } 2401 } 2402 /* loop over columns marking them in rowhit */ 2403 for (k=0; k<m; k++) { 2404 rowhit[*rows++] = col + 1; 2405 } 2406 } 2407 2408 /* count the number of hits */ 2409 nrows = 0; 2410 for (j=0; j<M; j++) { 2411 if (rowhit[j]) nrows++; 2412 } 2413 c->nrows[i] = nrows; 2414 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2415 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2416 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2417 nrows = 0; 2418 for (j=0; j<M; j++) { 2419 if (rowhit[j]) { 2420 c->rows[i][nrows] = j; 2421 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2422 nrows++; 2423 } 2424 } 2425 } else {/*-------------------------------------------------------------------------------*/ 2426 /* slow version, using rowhit as a linked list */ 2427 PetscInt currentcol,fm,mfm; 2428 rowhit[M] = M; 2429 nrows = 0; 2430 /* loop over columns*/ 2431 for (j=0; j<nctot; j++) { 2432 if (ctype == IS_COLORING_GHOSTED) { 2433 col = ltog[cols[j]]; 2434 } else { 2435 col = cols[j]; 2436 } 2437 if (col >= cstart && col < cend) { 2438 /* column is in diagonal block of matrix */ 2439 rows = A_cj + A_ci[col-cstart]; 2440 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2441 } else { 2442 #if defined (PETSC_USE_CTABLE) 2443 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2444 colb --; 2445 #else 2446 colb = baij->colmap[col] - 1; 2447 #endif 2448 if (colb == -1) { 2449 m = 0; 2450 } else { 2451 colb = colb/bs; 2452 rows = B_cj + B_ci[colb]; 2453 m = B_ci[colb+1] - B_ci[colb]; 2454 } 2455 } 2456 2457 /* loop over columns marking them in rowhit */ 2458 fm = M; /* fm points to first entry in linked list */ 2459 for (k=0; k<m; k++) { 2460 currentcol = *rows++; 2461 /* is it already in the list? */ 2462 do { 2463 mfm = fm; 2464 fm = rowhit[fm]; 2465 } while (fm < currentcol); 2466 /* not in list so add it */ 2467 if (fm != currentcol) { 2468 nrows++; 2469 columnsforrow[currentcol] = col; 2470 /* next three lines insert new entry into linked list */ 2471 rowhit[mfm] = currentcol; 2472 rowhit[currentcol] = fm; 2473 fm = currentcol; 2474 /* fm points to present position in list since we know the columns are sorted */ 2475 } else { 2476 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2477 } 2478 } 2479 } 2480 c->nrows[i] = nrows; 2481 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2482 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2483 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2484 /* now store the linked list of rows into c->rows[i] */ 2485 nrows = 0; 2486 fm = rowhit[M]; 2487 do { 2488 c->rows[i][nrows] = fm; 2489 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2490 fm = rowhit[fm]; 2491 } while (fm < M); 2492 } /* ---------------------------------------------------------------------------------------*/ 2493 ierr = PetscFree(cols);CHKERRQ(ierr); 2494 } 2495 2496 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2497 /* 2498 vscale will contain the "diagonal" on processor scalings followed by the off processor 2499 */ 2500 if (ctype == IS_COLORING_GLOBAL) { 2501 PetscInt *garray; 2502 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2503 for (i=0; i<baij->B->cmap->n/bs; i++) { 2504 for (j=0; j<bs; j++) { 2505 garray[i*bs+j] = bs*baij->garray[i]+j; 2506 } 2507 } 2508 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2509 ierr = PetscFree(garray);CHKERRQ(ierr); 2510 CHKMEMQ; 2511 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2512 for (k=0; k<c->ncolors; k++) { 2513 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2514 for (l=0; l<c->nrows[k]; l++) { 2515 col = c->columnsforrow[k][l]; 2516 if (col >= cstart && col < cend) { 2517 /* column is in diagonal block of matrix */ 2518 colb = col - cstart; 2519 } else { 2520 /* column is in "off-processor" part */ 2521 #if defined (PETSC_USE_CTABLE) 2522 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2523 colb --; 2524 #else 2525 colb = baij->colmap[col] - 1; 2526 #endif 2527 colb = colb/bs; 2528 colb += cend - cstart; 2529 } 2530 c->vscaleforrow[k][l] = colb; 2531 } 2532 } 2533 } else if (ctype == IS_COLORING_GHOSTED) { 2534 /* Get gtol mapping */ 2535 PetscInt N = mat->cmap->N, *gtol; 2536 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2537 for (i=0; i<N; i++) gtol[i] = -1; 2538 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2539 2540 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2541 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2542 for (k=0; k<c->ncolors; k++) { 2543 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2544 for (l=0; l<c->nrows[k]; l++) { 2545 col = c->columnsforrow[k][l]; /* global column index */ 2546 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2547 } 2548 } 2549 ierr = PetscFree(gtol);CHKERRQ(ierr); 2550 } 2551 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2552 2553 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2554 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2555 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2556 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2557 CHKMEMQ; 2558 PetscFunctionReturn(0); 2559 } 2560 2561 #undef __FUNCT__ 2562 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ" 2563 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2564 { 2565 Mat B; 2566 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2567 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2568 Mat_SeqAIJ *b; 2569 PetscErrorCode ierr; 2570 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2571 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2572 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2573 2574 PetscFunctionBegin; 2575 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2576 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2577 2578 /* ---------------------------------------------------------------- 2579 Tell every processor the number of nonzeros per row 2580 */ 2581 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2582 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2583 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2584 } 2585 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2586 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2587 displs = recvcounts + size; 2588 for (i=0; i<size; i++) { 2589 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2590 displs[i] = A->rmap->range[i]/bs; 2591 } 2592 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2593 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2594 #else 2595 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2596 #endif 2597 /* --------------------------------------------------------------- 2598 Create the sequential matrix of the same type as the local block diagonal 2599 */ 2600 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2601 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2602 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2603 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2604 b = (Mat_SeqAIJ *)B->data; 2605 2606 /*-------------------------------------------------------------------- 2607 Copy my part of matrix column indices over 2608 */ 2609 sendcount = ad->nz + bd->nz; 2610 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2611 a_jsendbuf = ad->j; 2612 b_jsendbuf = bd->j; 2613 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2614 cnt = 0; 2615 for (i=0; i<n; i++) { 2616 2617 /* put in lower diagonal portion */ 2618 m = bd->i[i+1] - bd->i[i]; 2619 while (m > 0) { 2620 /* is it above diagonal (in bd (compressed) numbering) */ 2621 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2622 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2623 m--; 2624 } 2625 2626 /* put in diagonal portion */ 2627 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2628 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2629 } 2630 2631 /* put in upper diagonal portion */ 2632 while (m-- > 0) { 2633 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2634 } 2635 } 2636 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2637 2638 /*-------------------------------------------------------------------- 2639 Gather all column indices to all processors 2640 */ 2641 for (i=0; i<size; i++) { 2642 recvcounts[i] = 0; 2643 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2644 recvcounts[i] += lens[j]; 2645 } 2646 } 2647 displs[0] = 0; 2648 for (i=1; i<size; i++) { 2649 displs[i] = displs[i-1] + recvcounts[i-1]; 2650 } 2651 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2652 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2653 #else 2654 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2655 #endif 2656 /*-------------------------------------------------------------------- 2657 Assemble the matrix into useable form (note numerical values not yet set) 2658 */ 2659 /* set the b->ilen (length of each row) values */ 2660 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2661 /* set the b->i indices */ 2662 b->i[0] = 0; 2663 for (i=1; i<=A->rmap->N/bs; i++) { 2664 b->i[i] = b->i[i-1] + lens[i-1]; 2665 } 2666 ierr = PetscFree(lens);CHKERRQ(ierr); 2667 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2668 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2669 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2670 2671 if (A->symmetric){ 2672 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2673 } else if (A->hermitian) { 2674 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2675 } else if (A->structurally_symmetric) { 2676 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2677 } 2678 *newmat = B; 2679 PetscFunctionReturn(0); 2680 } 2681 2682 #undef __FUNCT__ 2683 #define __FUNCT__ "MatSOR_MPIBAIJ" 2684 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2685 { 2686 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2687 PetscErrorCode ierr; 2688 Vec bb1 = 0; 2689 2690 PetscFunctionBegin; 2691 if (flag == SOR_APPLY_UPPER) { 2692 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2693 PetscFunctionReturn(0); 2694 } 2695 2696 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2697 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2698 } 2699 2700 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2701 if (flag & SOR_ZERO_INITIAL_GUESS) { 2702 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2703 its--; 2704 } 2705 2706 while (its--) { 2707 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2708 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2709 2710 /* update rhs: bb1 = bb - B*x */ 2711 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2712 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2713 2714 /* local sweep */ 2715 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2716 } 2717 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 2718 if (flag & SOR_ZERO_INITIAL_GUESS) { 2719 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2720 its--; 2721 } 2722 while (its--) { 2723 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2724 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2725 2726 /* update rhs: bb1 = bb - B*x */ 2727 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2728 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2729 2730 /* local sweep */ 2731 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2732 } 2733 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 2734 if (flag & SOR_ZERO_INITIAL_GUESS) { 2735 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2736 its--; 2737 } 2738 while (its--) { 2739 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2740 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2741 2742 /* update rhs: bb1 = bb - B*x */ 2743 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2744 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2745 2746 /* local sweep */ 2747 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2748 } 2749 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2750 2751 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2752 PetscFunctionReturn(0); 2753 } 2754 2755 extern PetscErrorCode MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2756 2757 #undef __FUNCT__ 2758 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ" 2759 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2760 { 2761 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2762 PetscErrorCode ierr; 2763 2764 PetscFunctionBegin; 2765 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2766 PetscFunctionReturn(0); 2767 } 2768 2769 2770 /* -------------------------------------------------------------------*/ 2771 static struct _MatOps MatOps_Values = { 2772 MatSetValues_MPIBAIJ, 2773 MatGetRow_MPIBAIJ, 2774 MatRestoreRow_MPIBAIJ, 2775 MatMult_MPIBAIJ, 2776 /* 4*/ MatMultAdd_MPIBAIJ, 2777 MatMultTranspose_MPIBAIJ, 2778 MatMultTransposeAdd_MPIBAIJ, 2779 0, 2780 0, 2781 0, 2782 /*10*/ 0, 2783 0, 2784 0, 2785 MatSOR_MPIBAIJ, 2786 MatTranspose_MPIBAIJ, 2787 /*15*/ MatGetInfo_MPIBAIJ, 2788 MatEqual_MPIBAIJ, 2789 MatGetDiagonal_MPIBAIJ, 2790 MatDiagonalScale_MPIBAIJ, 2791 MatNorm_MPIBAIJ, 2792 /*20*/ MatAssemblyBegin_MPIBAIJ, 2793 MatAssemblyEnd_MPIBAIJ, 2794 MatSetOption_MPIBAIJ, 2795 MatZeroEntries_MPIBAIJ, 2796 /*24*/ MatZeroRows_MPIBAIJ, 2797 0, 2798 0, 2799 0, 2800 0, 2801 /*29*/ MatSetUp_MPIBAIJ, 2802 0, 2803 0, 2804 0, 2805 0, 2806 /*34*/ MatDuplicate_MPIBAIJ, 2807 0, 2808 0, 2809 0, 2810 0, 2811 /*39*/ MatAXPY_MPIBAIJ, 2812 MatGetSubMatrices_MPIBAIJ, 2813 MatIncreaseOverlap_MPIBAIJ, 2814 MatGetValues_MPIBAIJ, 2815 MatCopy_MPIBAIJ, 2816 /*44*/ 0, 2817 MatScale_MPIBAIJ, 2818 0, 2819 0, 2820 0, 2821 /*49*/ 0, 2822 0, 2823 0, 2824 0, 2825 0, 2826 /*54*/ MatFDColoringCreate_MPIBAIJ, 2827 0, 2828 MatSetUnfactored_MPIBAIJ, 2829 MatPermute_MPIBAIJ, 2830 MatSetValuesBlocked_MPIBAIJ, 2831 /*59*/ MatGetSubMatrix_MPIBAIJ, 2832 MatDestroy_MPIBAIJ, 2833 MatView_MPIBAIJ, 2834 0, 2835 0, 2836 /*64*/ 0, 2837 0, 2838 0, 2839 0, 2840 0, 2841 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2842 0, 2843 0, 2844 0, 2845 0, 2846 /*74*/ 0, 2847 MatFDColoringApply_BAIJ, 2848 0, 2849 0, 2850 0, 2851 /*79*/ 0, 2852 0, 2853 0, 2854 0, 2855 MatLoad_MPIBAIJ, 2856 /*84*/ 0, 2857 0, 2858 0, 2859 0, 2860 0, 2861 /*89*/ 0, 2862 0, 2863 0, 2864 0, 2865 0, 2866 /*94*/ 0, 2867 0, 2868 0, 2869 0, 2870 0, 2871 /*99*/ 0, 2872 0, 2873 0, 2874 0, 2875 0, 2876 /*104*/0, 2877 MatRealPart_MPIBAIJ, 2878 MatImaginaryPart_MPIBAIJ, 2879 0, 2880 0, 2881 /*109*/0, 2882 0, 2883 0, 2884 0, 2885 0, 2886 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2887 0, 2888 MatGetGhosts_MPIBAIJ, 2889 0, 2890 0, 2891 /*119*/0, 2892 0, 2893 0, 2894 0, 2895 0, 2896 /*124*/0, 2897 0, 2898 MatInvertBlockDiagonal_MPIBAIJ 2899 }; 2900 2901 EXTERN_C_BEGIN 2902 #undef __FUNCT__ 2903 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2904 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2905 { 2906 PetscFunctionBegin; 2907 *a = ((Mat_MPIBAIJ *)A->data)->A; 2908 PetscFunctionReturn(0); 2909 } 2910 EXTERN_C_END 2911 2912 EXTERN_C_BEGIN 2913 extern PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2914 EXTERN_C_END 2915 2916 EXTERN_C_BEGIN 2917 #undef __FUNCT__ 2918 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2919 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2920 { 2921 PetscInt m,rstart,cstart,cend; 2922 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2923 const PetscInt *JJ=0; 2924 PetscScalar *values=0; 2925 PetscErrorCode ierr; 2926 2927 PetscFunctionBegin; 2928 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2929 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2930 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2931 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2932 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2933 m = B->rmap->n/bs; 2934 rstart = B->rmap->rstart/bs; 2935 cstart = B->cmap->rstart/bs; 2936 cend = B->cmap->rend/bs; 2937 2938 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2939 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2940 for (i=0; i<m; i++) { 2941 nz = ii[i+1] - ii[i]; 2942 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2943 nz_max = PetscMax(nz_max,nz); 2944 JJ = jj + ii[i]; 2945 for (j=0; j<nz; j++) { 2946 if (*JJ >= cstart) break; 2947 JJ++; 2948 } 2949 d = 0; 2950 for (; j<nz; j++) { 2951 if (*JJ++ >= cend) break; 2952 d++; 2953 } 2954 d_nnz[i] = d; 2955 o_nnz[i] = nz - d; 2956 } 2957 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2958 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2959 2960 values = (PetscScalar*)V; 2961 if (!values) { 2962 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2963 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2964 } 2965 for (i=0; i<m; i++) { 2966 PetscInt row = i + rstart; 2967 PetscInt ncols = ii[i+1] - ii[i]; 2968 const PetscInt *icols = jj + ii[i]; 2969 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2970 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2971 } 2972 2973 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2974 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2975 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2976 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2977 PetscFunctionReturn(0); 2978 } 2979 EXTERN_C_END 2980 2981 #undef __FUNCT__ 2982 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2983 /*@C 2984 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2985 (the default parallel PETSc format). 2986 2987 Collective on MPI_Comm 2988 2989 Input Parameters: 2990 + A - the matrix 2991 . bs - the block size 2992 . i - the indices into j for the start of each local row (starts with zero) 2993 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2994 - v - optional values in the matrix 2995 2996 Level: developer 2997 2998 .keywords: matrix, aij, compressed row, sparse, parallel 2999 3000 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 3001 @*/ 3002 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3003 { 3004 PetscErrorCode ierr; 3005 3006 PetscFunctionBegin; 3007 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3008 PetscValidType(B,1); 3009 PetscValidLogicalCollectiveInt(B,bs,2); 3010 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3011 PetscFunctionReturn(0); 3012 } 3013 3014 EXTERN_C_BEGIN 3015 #undef __FUNCT__ 3016 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 3017 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 3018 { 3019 Mat_MPIBAIJ *b; 3020 PetscErrorCode ierr; 3021 PetscInt i; 3022 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3023 3024 PetscFunctionBegin; 3025 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3026 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3027 3028 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3029 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3030 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3031 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3032 3033 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3034 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3035 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3036 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3037 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3038 3039 if (d_nnz) { 3040 for (i=0; i<B->rmap->n/bs; i++) { 3041 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 3042 } 3043 } 3044 if (o_nnz) { 3045 for (i=0; i<B->rmap->n/bs; i++) { 3046 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 3047 } 3048 } 3049 3050 b = (Mat_MPIBAIJ*)B->data; 3051 b->bs2 = bs*bs; 3052 b->mbs = B->rmap->n/bs; 3053 b->nbs = B->cmap->n/bs; 3054 b->Mbs = B->rmap->N/bs; 3055 b->Nbs = B->cmap->N/bs; 3056 3057 for (i=0; i<=b->size; i++) { 3058 b->rangebs[i] = B->rmap->range[i]/bs; 3059 } 3060 b->rstartbs = B->rmap->rstart/bs; 3061 b->rendbs = B->rmap->rend/bs; 3062 b->cstartbs = B->cmap->rstart/bs; 3063 b->cendbs = B->cmap->rend/bs; 3064 3065 if (!B->preallocated) { 3066 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3067 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3068 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 3069 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3070 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3071 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3072 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 3073 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3074 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 3075 } 3076 3077 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3078 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 3079 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3080 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3081 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3082 B->preallocated = PETSC_TRUE; 3083 PetscFunctionReturn(0); 3084 } 3085 EXTERN_C_END 3086 3087 EXTERN_C_BEGIN 3088 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 3089 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 3090 EXTERN_C_END 3091 3092 3093 EXTERN_C_BEGIN 3094 #undef __FUNCT__ 3095 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 3096 PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 3097 { 3098 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 3099 PetscErrorCode ierr; 3100 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 3101 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 3102 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 3103 3104 PetscFunctionBegin; 3105 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 3106 ii[0] = 0; 3107 CHKMEMQ; 3108 for (i=0; i<M; i++) { 3109 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 3110 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 3111 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 3112 CHKMEMQ; 3113 } 3114 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3115 cnt = 0; 3116 for (i=0; i<M; i++) { 3117 for (j=io[i]; j<io[i+1]; j++) { 3118 if (garray[jo[j]] > rstart) break; 3119 jj[cnt++] = garray[jo[j]]; 3120 CHKMEMQ; 3121 } 3122 for (k=id[i]; k<id[i+1]; k++) { 3123 jj[cnt++] = rstart + jd[k]; 3124 CHKMEMQ; 3125 } 3126 for (;j<io[i+1]; j++) { 3127 jj[cnt++] = garray[jo[j]]; 3128 CHKMEMQ; 3129 } 3130 } 3131 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 3132 PetscFunctionReturn(0); 3133 } 3134 EXTERN_C_END 3135 3136 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3137 EXTERN_C_BEGIN 3138 PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 3139 EXTERN_C_END 3140 3141 EXTERN_C_BEGIN 3142 #undef __FUNCT__ 3143 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 3144 PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 3145 { 3146 PetscErrorCode ierr; 3147 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3148 Mat B; 3149 Mat_MPIAIJ *b; 3150 3151 PetscFunctionBegin; 3152 if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled"); 3153 3154 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 3155 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3156 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 3157 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3158 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 3159 b = (Mat_MPIAIJ*) B->data; 3160 3161 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 3162 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 3163 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 3164 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 3165 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 3166 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3167 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3168 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3169 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3170 if (reuse == MAT_REUSE_MATRIX) { 3171 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3172 } else { 3173 *newmat = B; 3174 } 3175 PetscFunctionReturn(0); 3176 } 3177 EXTERN_C_END 3178 3179 EXTERN_C_BEGIN 3180 #if defined(PETSC_HAVE_MUMPS) 3181 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3182 #endif 3183 EXTERN_C_END 3184 3185 /*MC 3186 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 3187 3188 Options Database Keys: 3189 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 3190 . -mat_block_size <bs> - set the blocksize used to store the matrix 3191 - -mat_use_hash_table <fact> 3192 3193 Level: beginner 3194 3195 .seealso: MatCreateMPIBAIJ 3196 M*/ 3197 3198 EXTERN_C_BEGIN 3199 extern PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 3200 EXTERN_C_END 3201 3202 EXTERN_C_BEGIN 3203 #undef __FUNCT__ 3204 #define __FUNCT__ "MatCreate_MPIBAIJ" 3205 PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3206 { 3207 Mat_MPIBAIJ *b; 3208 PetscErrorCode ierr; 3209 PetscBool flg; 3210 3211 PetscFunctionBegin; 3212 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 3213 B->data = (void*)b; 3214 3215 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3216 B->assembled = PETSC_FALSE; 3217 3218 B->insertmode = NOT_SET_VALUES; 3219 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 3220 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 3221 3222 /* build local table of row and column ownerships */ 3223 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 3224 3225 /* build cache for off array entries formed */ 3226 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 3227 b->donotstash = PETSC_FALSE; 3228 b->colmap = PETSC_NULL; 3229 b->garray = PETSC_NULL; 3230 b->roworiented = PETSC_TRUE; 3231 3232 /* stuff used in block assembly */ 3233 b->barray = 0; 3234 3235 /* stuff used for matrix vector multiply */ 3236 b->lvec = 0; 3237 b->Mvctx = 0; 3238 3239 /* stuff for MatGetRow() */ 3240 b->rowindices = 0; 3241 b->rowvalues = 0; 3242 b->getrowactive = PETSC_FALSE; 3243 3244 /* hash table stuff */ 3245 b->ht = 0; 3246 b->hd = 0; 3247 b->ht_size = 0; 3248 b->ht_flag = PETSC_FALSE; 3249 b->ht_fact = 0; 3250 b->ht_total_ct = 0; 3251 b->ht_insert_ct = 0; 3252 3253 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 3254 b->ijonly = PETSC_FALSE; 3255 3256 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3257 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3258 if (flg) { 3259 PetscReal fact = 1.39; 3260 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3261 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 3262 if (fact <= 1.0) fact = 1.39; 3263 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3264 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3265 } 3266 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3267 3268 #if defined(PETSC_HAVE_MUMPS) 3269 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3270 #endif 3271 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 3272 "MatConvert_MPIBAIJ_MPIAdj", 3273 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3274 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C", 3275 "MatConvert_MPIBAIJ_MPIAIJ", 3276 MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3277 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C", 3278 "MatConvert_MPIBAIJ_MPISBAIJ", 3279 MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3280 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3281 "MatStoreValues_MPIBAIJ", 3282 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3283 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3284 "MatRetrieveValues_MPIBAIJ", 3285 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3286 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3287 "MatGetDiagonalBlock_MPIBAIJ", 3288 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3289 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3290 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3291 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3292 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3293 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3294 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3295 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3296 "MatDiagonalScaleLocal_MPIBAIJ", 3297 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3298 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3299 "MatSetHashTableFactor_MPIBAIJ", 3300 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3301 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C", 3302 "MatConvert_MPIBAIJ_MPIBSTRM", 3303 MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr); 3304 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3305 PetscFunctionReturn(0); 3306 } 3307 EXTERN_C_END 3308 3309 /*MC 3310 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3311 3312 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3313 and MATMPIBAIJ otherwise. 3314 3315 Options Database Keys: 3316 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3317 3318 Level: beginner 3319 3320 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3321 M*/ 3322 3323 #undef __FUNCT__ 3324 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3325 /*@C 3326 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3327 (block compressed row). For good matrix assembly performance 3328 the user should preallocate the matrix storage by setting the parameters 3329 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3330 performance can be increased by more than a factor of 50. 3331 3332 Collective on Mat 3333 3334 Input Parameters: 3335 + A - the matrix 3336 . bs - size of blockk 3337 . d_nz - number of block nonzeros per block row in diagonal portion of local 3338 submatrix (same for all local rows) 3339 . d_nnz - array containing the number of block nonzeros in the various block rows 3340 of the in diagonal portion of the local (possibly different for each block 3341 row) or PETSC_NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 3342 set it even if it is zero. 3343 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3344 submatrix (same for all local rows). 3345 - o_nnz - array containing the number of nonzeros in the various block rows of the 3346 off-diagonal portion of the local submatrix (possibly different for 3347 each block row) or PETSC_NULL. 3348 3349 If the *_nnz parameter is given then the *_nz parameter is ignored 3350 3351 Options Database Keys: 3352 + -mat_block_size - size of the blocks to use 3353 - -mat_use_hash_table <fact> 3354 3355 Notes: 3356 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3357 than it must be used on all processors that share the object for that argument. 3358 3359 Storage Information: 3360 For a square global matrix we define each processor's diagonal portion 3361 to be its local rows and the corresponding columns (a square submatrix); 3362 each processor's off-diagonal portion encompasses the remainder of the 3363 local matrix (a rectangular submatrix). 3364 3365 The user can specify preallocated storage for the diagonal part of 3366 the local submatrix with either d_nz or d_nnz (not both). Set 3367 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3368 memory allocation. Likewise, specify preallocated storage for the 3369 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3370 3371 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3372 the figure below we depict these three local rows and all columns (0-11). 3373 3374 .vb 3375 0 1 2 3 4 5 6 7 8 9 10 11 3376 ------------------- 3377 row 3 | o o o d d d o o o o o o 3378 row 4 | o o o d d d o o o o o o 3379 row 5 | o o o d d d o o o o o o 3380 ------------------- 3381 .ve 3382 3383 Thus, any entries in the d locations are stored in the d (diagonal) 3384 submatrix, and any entries in the o locations are stored in the 3385 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3386 stored simply in the MATSEQBAIJ format for compressed row storage. 3387 3388 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3389 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3390 In general, for PDE problems in which most nonzeros are near the diagonal, 3391 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3392 or you will get TERRIBLE performance; see the users' manual chapter on 3393 matrices. 3394 3395 You can call MatGetInfo() to get information on how effective the preallocation was; 3396 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3397 You can also run with the option -info and look for messages with the string 3398 malloc in them to see if additional memory allocation was needed. 3399 3400 Level: intermediate 3401 3402 .keywords: matrix, block, aij, compressed row, sparse, parallel 3403 3404 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR() 3405 @*/ 3406 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3407 { 3408 PetscErrorCode ierr; 3409 3410 PetscFunctionBegin; 3411 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3412 PetscValidType(B,1); 3413 PetscValidLogicalCollectiveInt(B,bs,2); 3414 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3415 PetscFunctionReturn(0); 3416 } 3417 3418 #undef __FUNCT__ 3419 #define __FUNCT__ "MatCreateBAIJ" 3420 /*@C 3421 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3422 (block compressed row). For good matrix assembly performance 3423 the user should preallocate the matrix storage by setting the parameters 3424 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3425 performance can be increased by more than a factor of 50. 3426 3427 Collective on MPI_Comm 3428 3429 Input Parameters: 3430 + comm - MPI communicator 3431 . bs - size of blockk 3432 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3433 This value should be the same as the local size used in creating the 3434 y vector for the matrix-vector product y = Ax. 3435 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3436 This value should be the same as the local size used in creating the 3437 x vector for the matrix-vector product y = Ax. 3438 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3439 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3440 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3441 submatrix (same for all local rows) 3442 . d_nnz - array containing the number of nonzero blocks in the various block rows 3443 of the in diagonal portion of the local (possibly different for each block 3444 row) or PETSC_NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3445 and set it even if it is zero. 3446 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3447 submatrix (same for all local rows). 3448 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3449 off-diagonal portion of the local submatrix (possibly different for 3450 each block row) or PETSC_NULL. 3451 3452 Output Parameter: 3453 . A - the matrix 3454 3455 Options Database Keys: 3456 + -mat_block_size - size of the blocks to use 3457 - -mat_use_hash_table <fact> 3458 3459 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3460 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3461 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3462 3463 Notes: 3464 If the *_nnz parameter is given then the *_nz parameter is ignored 3465 3466 A nonzero block is any block that as 1 or more nonzeros in it 3467 3468 The user MUST specify either the local or global matrix dimensions 3469 (possibly both). 3470 3471 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3472 than it must be used on all processors that share the object for that argument. 3473 3474 Storage Information: 3475 For a square global matrix we define each processor's diagonal portion 3476 to be its local rows and the corresponding columns (a square submatrix); 3477 each processor's off-diagonal portion encompasses the remainder of the 3478 local matrix (a rectangular submatrix). 3479 3480 The user can specify preallocated storage for the diagonal part of 3481 the local submatrix with either d_nz or d_nnz (not both). Set 3482 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3483 memory allocation. Likewise, specify preallocated storage for the 3484 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3485 3486 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3487 the figure below we depict these three local rows and all columns (0-11). 3488 3489 .vb 3490 0 1 2 3 4 5 6 7 8 9 10 11 3491 ------------------- 3492 row 3 | o o o d d d o o o o o o 3493 row 4 | o o o d d d o o o o o o 3494 row 5 | o o o d d d o o o o o o 3495 ------------------- 3496 .ve 3497 3498 Thus, any entries in the d locations are stored in the d (diagonal) 3499 submatrix, and any entries in the o locations are stored in the 3500 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3501 stored simply in the MATSEQBAIJ format for compressed row storage. 3502 3503 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3504 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3505 In general, for PDE problems in which most nonzeros are near the diagonal, 3506 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3507 or you will get TERRIBLE performance; see the users' manual chapter on 3508 matrices. 3509 3510 Level: intermediate 3511 3512 .keywords: matrix, block, aij, compressed row, sparse, parallel 3513 3514 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3515 @*/ 3516 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3517 { 3518 PetscErrorCode ierr; 3519 PetscMPIInt size; 3520 3521 PetscFunctionBegin; 3522 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3523 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3524 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3525 if (size > 1) { 3526 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3527 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3528 } else { 3529 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3530 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3531 } 3532 PetscFunctionReturn(0); 3533 } 3534 3535 #undef __FUNCT__ 3536 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3537 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3538 { 3539 Mat mat; 3540 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3541 PetscErrorCode ierr; 3542 PetscInt len=0; 3543 3544 PetscFunctionBegin; 3545 *newmat = 0; 3546 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3547 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3548 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3549 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3550 3551 mat->factortype = matin->factortype; 3552 mat->preallocated = PETSC_TRUE; 3553 mat->assembled = PETSC_TRUE; 3554 mat->insertmode = NOT_SET_VALUES; 3555 3556 a = (Mat_MPIBAIJ*)mat->data; 3557 mat->rmap->bs = matin->rmap->bs; 3558 a->bs2 = oldmat->bs2; 3559 a->mbs = oldmat->mbs; 3560 a->nbs = oldmat->nbs; 3561 a->Mbs = oldmat->Mbs; 3562 a->Nbs = oldmat->Nbs; 3563 3564 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3565 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3566 3567 a->size = oldmat->size; 3568 a->rank = oldmat->rank; 3569 a->donotstash = oldmat->donotstash; 3570 a->roworiented = oldmat->roworiented; 3571 a->rowindices = 0; 3572 a->rowvalues = 0; 3573 a->getrowactive = PETSC_FALSE; 3574 a->barray = 0; 3575 a->rstartbs = oldmat->rstartbs; 3576 a->rendbs = oldmat->rendbs; 3577 a->cstartbs = oldmat->cstartbs; 3578 a->cendbs = oldmat->cendbs; 3579 3580 /* hash table stuff */ 3581 a->ht = 0; 3582 a->hd = 0; 3583 a->ht_size = 0; 3584 a->ht_flag = oldmat->ht_flag; 3585 a->ht_fact = oldmat->ht_fact; 3586 a->ht_total_ct = 0; 3587 a->ht_insert_ct = 0; 3588 3589 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3590 if (oldmat->colmap) { 3591 #if defined (PETSC_USE_CTABLE) 3592 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3593 #else 3594 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3595 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3596 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3597 #endif 3598 } else a->colmap = 0; 3599 3600 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3601 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3602 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3603 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3604 } else a->garray = 0; 3605 3606 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3607 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3608 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3609 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3610 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3611 3612 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3613 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3614 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3615 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3616 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3617 *newmat = mat; 3618 3619 PetscFunctionReturn(0); 3620 } 3621 3622 #undef __FUNCT__ 3623 #define __FUNCT__ "MatLoad_MPIBAIJ" 3624 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3625 { 3626 PetscErrorCode ierr; 3627 int fd; 3628 PetscInt i,nz,j,rstart,rend; 3629 PetscScalar *vals,*buf; 3630 MPI_Comm comm = ((PetscObject)viewer)->comm; 3631 MPI_Status status; 3632 PetscMPIInt rank,size,maxnz; 3633 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3634 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3635 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3636 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3637 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3638 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3639 3640 PetscFunctionBegin; 3641 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3642 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3643 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3644 3645 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3646 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3647 if (!rank) { 3648 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3649 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3650 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3651 } 3652 3653 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3654 3655 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3656 M = header[1]; N = header[2]; 3657 3658 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3659 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3660 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3661 3662 /* If global sizes are set, check if they are consistent with that given in the file */ 3663 if (sizesset) { 3664 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3665 } 3666 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3667 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3668 3669 if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices"); 3670 3671 /* 3672 This code adds extra rows to make sure the number of rows is 3673 divisible by the blocksize 3674 */ 3675 Mbs = M/bs; 3676 extra_rows = bs - M + bs*Mbs; 3677 if (extra_rows == bs) extra_rows = 0; 3678 else Mbs++; 3679 if (extra_rows && !rank) { 3680 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3681 } 3682 3683 /* determine ownership of all rows */ 3684 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3685 mbs = Mbs/size + ((Mbs % size) > rank); 3686 m = mbs*bs; 3687 } else { /* User set */ 3688 m = newmat->rmap->n; 3689 mbs = m/bs; 3690 } 3691 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3692 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3693 3694 /* process 0 needs enough room for process with most rows */ 3695 if (!rank) { 3696 mmax = rowners[1]; 3697 for (i=2; i<=size; i++) { 3698 mmax = PetscMax(mmax,rowners[i]); 3699 } 3700 mmax*=bs; 3701 } else mmax = m; 3702 3703 rowners[0] = 0; 3704 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3705 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3706 rstart = rowners[rank]; 3707 rend = rowners[rank+1]; 3708 3709 /* distribute row lengths to all processors */ 3710 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3711 if (!rank) { 3712 mend = m; 3713 if (size == 1) mend = mend - extra_rows; 3714 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3715 for (j=mend; j<m; j++) locrowlens[j] = 1; 3716 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3717 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3718 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3719 for (j=0; j<m; j++) { 3720 procsnz[0] += locrowlens[j]; 3721 } 3722 for (i=1; i<size; i++) { 3723 mend = browners[i+1] - browners[i]; 3724 if (i == size-1) mend = mend - extra_rows; 3725 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3726 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3727 /* calculate the number of nonzeros on each processor */ 3728 for (j=0; j<browners[i+1]-browners[i]; j++) { 3729 procsnz[i] += rowlengths[j]; 3730 } 3731 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3732 } 3733 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3734 } else { 3735 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3736 } 3737 3738 if (!rank) { 3739 /* determine max buffer needed and allocate it */ 3740 maxnz = procsnz[0]; 3741 for (i=1; i<size; i++) { 3742 maxnz = PetscMax(maxnz,procsnz[i]); 3743 } 3744 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3745 3746 /* read in my part of the matrix column indices */ 3747 nz = procsnz[0]; 3748 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3749 mycols = ibuf; 3750 if (size == 1) nz -= extra_rows; 3751 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3752 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3753 3754 /* read in every ones (except the last) and ship off */ 3755 for (i=1; i<size-1; i++) { 3756 nz = procsnz[i]; 3757 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3758 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3759 } 3760 /* read in the stuff for the last proc */ 3761 if (size != 1) { 3762 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3763 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3764 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3765 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3766 } 3767 ierr = PetscFree(cols);CHKERRQ(ierr); 3768 } else { 3769 /* determine buffer space needed for message */ 3770 nz = 0; 3771 for (i=0; i<m; i++) { 3772 nz += locrowlens[i]; 3773 } 3774 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3775 mycols = ibuf; 3776 /* receive message of column indices*/ 3777 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3778 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3779 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3780 } 3781 3782 /* loop over local rows, determining number of off diagonal entries */ 3783 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3784 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3785 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3786 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3787 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3788 rowcount = 0; nzcount = 0; 3789 for (i=0; i<mbs; i++) { 3790 dcount = 0; 3791 odcount = 0; 3792 for (j=0; j<bs; j++) { 3793 kmax = locrowlens[rowcount]; 3794 for (k=0; k<kmax; k++) { 3795 tmp = mycols[nzcount++]/bs; 3796 if (!mask[tmp]) { 3797 mask[tmp] = 1; 3798 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3799 else masked1[dcount++] = tmp; 3800 } 3801 } 3802 rowcount++; 3803 } 3804 3805 dlens[i] = dcount; 3806 odlens[i] = odcount; 3807 3808 /* zero out the mask elements we set */ 3809 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3810 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3811 } 3812 3813 3814 if (!sizesset) { 3815 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3816 } 3817 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3818 3819 if (!rank) { 3820 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3821 /* read in my part of the matrix numerical values */ 3822 nz = procsnz[0]; 3823 vals = buf; 3824 mycols = ibuf; 3825 if (size == 1) nz -= extra_rows; 3826 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3827 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3828 3829 /* insert into matrix */ 3830 jj = rstart*bs; 3831 for (i=0; i<m; i++) { 3832 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3833 mycols += locrowlens[i]; 3834 vals += locrowlens[i]; 3835 jj++; 3836 } 3837 /* read in other processors (except the last one) and ship out */ 3838 for (i=1; i<size-1; i++) { 3839 nz = procsnz[i]; 3840 vals = buf; 3841 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3842 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3843 } 3844 /* the last proc */ 3845 if (size != 1){ 3846 nz = procsnz[i] - extra_rows; 3847 vals = buf; 3848 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3849 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3850 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3851 } 3852 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3853 } else { 3854 /* receive numeric values */ 3855 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3856 3857 /* receive message of values*/ 3858 vals = buf; 3859 mycols = ibuf; 3860 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 3861 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 3862 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3863 3864 /* insert into matrix */ 3865 jj = rstart*bs; 3866 for (i=0; i<m; i++) { 3867 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3868 mycols += locrowlens[i]; 3869 vals += locrowlens[i]; 3870 jj++; 3871 } 3872 } 3873 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3874 ierr = PetscFree(buf);CHKERRQ(ierr); 3875 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3876 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3877 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3878 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3879 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3880 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3881 3882 PetscFunctionReturn(0); 3883 } 3884 3885 #undef __FUNCT__ 3886 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3887 /*@ 3888 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3889 3890 Input Parameters: 3891 . mat - the matrix 3892 . fact - factor 3893 3894 Not Collective, each process can use a different factor 3895 3896 Level: advanced 3897 3898 Notes: 3899 This can also be set by the command line option: -mat_use_hash_table <fact> 3900 3901 .keywords: matrix, hashtable, factor, HT 3902 3903 .seealso: MatSetOption() 3904 @*/ 3905 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3906 { 3907 PetscErrorCode ierr; 3908 3909 PetscFunctionBegin; 3910 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3911 PetscFunctionReturn(0); 3912 } 3913 3914 EXTERN_C_BEGIN 3915 #undef __FUNCT__ 3916 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3917 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3918 { 3919 Mat_MPIBAIJ *baij; 3920 3921 PetscFunctionBegin; 3922 baij = (Mat_MPIBAIJ*)mat->data; 3923 baij->ht_fact = fact; 3924 PetscFunctionReturn(0); 3925 } 3926 EXTERN_C_END 3927 3928 #undef __FUNCT__ 3929 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3930 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3931 { 3932 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3933 PetscFunctionBegin; 3934 *Ad = a->A; 3935 *Ao = a->B; 3936 *colmap = a->garray; 3937 PetscFunctionReturn(0); 3938 } 3939 3940 /* 3941 Special version for direct calls from Fortran (to eliminate two function call overheads 3942 */ 3943 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3944 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3945 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3946 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3947 #endif 3948 3949 #undef __FUNCT__ 3950 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3951 /*@C 3952 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3953 3954 Collective on Mat 3955 3956 Input Parameters: 3957 + mat - the matrix 3958 . min - number of input rows 3959 . im - input rows 3960 . nin - number of input columns 3961 . in - input columns 3962 . v - numerical values input 3963 - addvin - INSERT_VALUES or ADD_VALUES 3964 3965 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3966 3967 Level: advanced 3968 3969 .seealso: MatSetValuesBlocked() 3970 @*/ 3971 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3972 { 3973 /* convert input arguments to C version */ 3974 Mat mat = *matin; 3975 PetscInt m = *min, n = *nin; 3976 InsertMode addv = *addvin; 3977 3978 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3979 const MatScalar *value; 3980 MatScalar *barray=baij->barray; 3981 PetscBool roworiented = baij->roworiented; 3982 PetscErrorCode ierr; 3983 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3984 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3985 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3986 3987 PetscFunctionBegin; 3988 /* tasks normally handled by MatSetValuesBlocked() */ 3989 if (mat->insertmode == NOT_SET_VALUES) { 3990 mat->insertmode = addv; 3991 } 3992 #if defined(PETSC_USE_DEBUG) 3993 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3994 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3995 #endif 3996 if (mat->assembled) { 3997 mat->was_assembled = PETSC_TRUE; 3998 mat->assembled = PETSC_FALSE; 3999 } 4000 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4001 4002 4003 if (!barray) { 4004 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 4005 baij->barray = barray; 4006 } 4007 4008 if (roworiented) { 4009 stepval = (n-1)*bs; 4010 } else { 4011 stepval = (m-1)*bs; 4012 } 4013 for (i=0; i<m; i++) { 4014 if (im[i] < 0) continue; 4015 #if defined(PETSC_USE_DEBUG) 4016 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 4017 #endif 4018 if (im[i] >= rstart && im[i] < rend) { 4019 row = im[i] - rstart; 4020 for (j=0; j<n; j++) { 4021 /* If NumCol = 1 then a copy is not required */ 4022 if ((roworiented) && (n == 1)) { 4023 barray = (MatScalar*)v + i*bs2; 4024 } else if ((!roworiented) && (m == 1)) { 4025 barray = (MatScalar*)v + j*bs2; 4026 } else { /* Here a copy is required */ 4027 if (roworiented) { 4028 value = v + i*(stepval+bs)*bs + j*bs; 4029 } else { 4030 value = v + j*(stepval+bs)*bs + i*bs; 4031 } 4032 for (ii=0; ii<bs; ii++,value+=stepval) { 4033 for (jj=0; jj<bs; jj++) { 4034 *barray++ = *value++; 4035 } 4036 } 4037 barray -=bs2; 4038 } 4039 4040 if (in[j] >= cstart && in[j] < cend){ 4041 col = in[j] - cstart; 4042 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4043 } 4044 else if (in[j] < 0) continue; 4045 #if defined(PETSC_USE_DEBUG) 4046 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 4047 #endif 4048 else { 4049 if (mat->was_assembled) { 4050 if (!baij->colmap) { 4051 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 4052 } 4053 4054 #if defined(PETSC_USE_DEBUG) 4055 #if defined (PETSC_USE_CTABLE) 4056 { PetscInt data; 4057 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 4058 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4059 } 4060 #else 4061 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4062 #endif 4063 #endif 4064 #if defined (PETSC_USE_CTABLE) 4065 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4066 col = (col - 1)/bs; 4067 #else 4068 col = (baij->colmap[in[j]] - 1)/bs; 4069 #endif 4070 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 4071 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 4072 col = in[j]; 4073 } 4074 } 4075 else col = in[j]; 4076 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4077 } 4078 } 4079 } else { 4080 if (!baij->donotstash) { 4081 if (roworiented) { 4082 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4083 } else { 4084 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4085 } 4086 } 4087 } 4088 } 4089 4090 /* task normally handled by MatSetValuesBlocked() */ 4091 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4092 PetscFunctionReturn(0); 4093 } 4094 4095 #undef __FUNCT__ 4096 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 4097 /*@ 4098 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 4099 CSR format the local rows. 4100 4101 Collective on MPI_Comm 4102 4103 Input Parameters: 4104 + comm - MPI communicator 4105 . bs - the block size, only a block size of 1 is supported 4106 . m - number of local rows (Cannot be PETSC_DECIDE) 4107 . n - This value should be the same as the local size used in creating the 4108 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4109 calculated if N is given) For square matrices n is almost always m. 4110 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4111 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4112 . i - row indices 4113 . j - column indices 4114 - a - matrix values 4115 4116 Output Parameter: 4117 . mat - the matrix 4118 4119 Level: intermediate 4120 4121 Notes: 4122 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4123 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4124 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4125 4126 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4127 4128 .keywords: matrix, aij, compressed row, sparse, parallel 4129 4130 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4131 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4132 @*/ 4133 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4134 { 4135 PetscErrorCode ierr; 4136 4137 4138 PetscFunctionBegin; 4139 if (i[0]) { 4140 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4141 } 4142 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4143 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4144 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4145 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 4146 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 4147 PetscFunctionReturn(0); 4148 } 4149