1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 extern PetscErrorCode DisAssemble_MPIBAIJ(Mat); 7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec); 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 17 { 18 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 19 PetscErrorCode ierr; 20 PetscInt i,*idxb = 0; 21 PetscScalar *va,*vb; 22 Vec vtmp; 23 24 PetscFunctionBegin; 25 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 26 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 27 if (idx) { 28 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 29 } 30 31 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 32 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 33 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 34 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 35 36 for (i=0; i<A->rmap->n; i++){ 37 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 38 } 39 40 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 41 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 42 ierr = PetscFree(idxb);CHKERRQ(ierr); 43 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 44 PetscFunctionReturn(0); 45 } 46 47 EXTERN_C_BEGIN 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 50 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 51 { 52 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 53 PetscErrorCode ierr; 54 55 PetscFunctionBegin; 56 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 57 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 EXTERN_C_END 61 62 EXTERN_C_BEGIN 63 #undef __FUNCT__ 64 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 65 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 66 { 67 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 68 PetscErrorCode ierr; 69 70 PetscFunctionBegin; 71 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 72 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 73 PetscFunctionReturn(0); 74 } 75 EXTERN_C_END 76 77 /* 78 Local utility routine that creates a mapping from the global column 79 number to the local number in the off-diagonal part of the local 80 storage of the matrix. This is done in a non scalable way since the 81 length of colmap equals the global matrix length. 82 */ 83 #undef __FUNCT__ 84 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 85 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 86 { 87 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 88 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 89 PetscErrorCode ierr; 90 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 91 92 PetscFunctionBegin; 93 #if defined (PETSC_USE_CTABLE) 94 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 95 for (i=0; i<nbs; i++){ 96 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 97 } 98 #else 99 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 100 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 101 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 102 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 103 #endif 104 PetscFunctionReturn(0); 105 } 106 107 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 108 { \ 109 \ 110 brow = row/bs; \ 111 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 112 rmax = aimax[brow]; nrow = ailen[brow]; \ 113 bcol = col/bs; \ 114 ridx = row % bs; cidx = col % bs; \ 115 low = 0; high = nrow; \ 116 while (high-low > 3) { \ 117 t = (low+high)/2; \ 118 if (rp[t] > bcol) high = t; \ 119 else low = t; \ 120 } \ 121 for (_i=low; _i<high; _i++) { \ 122 if (rp[_i] > bcol) break; \ 123 if (rp[_i] == bcol) { \ 124 bap = ap + bs2*_i + bs*cidx + ridx; \ 125 if (addv == ADD_VALUES) *bap += value; \ 126 else *bap = value; \ 127 goto a_noinsert; \ 128 } \ 129 } \ 130 if (a->nonew == 1) goto a_noinsert; \ 131 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 132 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 133 N = nrow++ - 1; \ 134 /* shift up all the later entries in this row */ \ 135 for (ii=N; ii>=_i; ii--) { \ 136 rp[ii+1] = rp[ii]; \ 137 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 138 } \ 139 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 140 rp[_i] = bcol; \ 141 ap[bs2*_i + bs*cidx + ridx] = value; \ 142 a_noinsert:; \ 143 ailen[brow] = nrow; \ 144 } 145 146 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 147 { \ 148 brow = row/bs; \ 149 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 150 rmax = bimax[brow]; nrow = bilen[brow]; \ 151 bcol = col/bs; \ 152 ridx = row % bs; cidx = col % bs; \ 153 low = 0; high = nrow; \ 154 while (high-low > 3) { \ 155 t = (low+high)/2; \ 156 if (rp[t] > bcol) high = t; \ 157 else low = t; \ 158 } \ 159 for (_i=low; _i<high; _i++) { \ 160 if (rp[_i] > bcol) break; \ 161 if (rp[_i] == bcol) { \ 162 bap = ap + bs2*_i + bs*cidx + ridx; \ 163 if (addv == ADD_VALUES) *bap += value; \ 164 else *bap = value; \ 165 goto b_noinsert; \ 166 } \ 167 } \ 168 if (b->nonew == 1) goto b_noinsert; \ 169 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 170 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 171 CHKMEMQ;\ 172 N = nrow++ - 1; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 177 } \ 178 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 179 rp[_i] = bcol; \ 180 ap[bs2*_i + bs*cidx + ridx] = value; \ 181 b_noinsert:; \ 182 bilen[brow] = nrow; \ 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatSetValues_MPIBAIJ" 187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 188 { 189 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 190 MatScalar value; 191 PetscBool roworiented = baij->roworiented; 192 PetscErrorCode ierr; 193 PetscInt i,j,row,col; 194 PetscInt rstart_orig=mat->rmap->rstart; 195 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 196 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 197 198 /* Some Variables required in the macro */ 199 Mat A = baij->A; 200 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 201 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 202 MatScalar *aa=a->a; 203 204 Mat B = baij->B; 205 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 206 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 207 MatScalar *ba=b->a; 208 209 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 210 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 211 MatScalar *ap,*bap; 212 213 PetscFunctionBegin; 214 if (v) PetscValidScalarPointer(v,6); 215 for (i=0; i<m; i++) { 216 if (im[i] < 0) continue; 217 #if defined(PETSC_USE_DEBUG) 218 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 219 #endif 220 if (im[i] >= rstart_orig && im[i] < rend_orig) { 221 row = im[i] - rstart_orig; 222 for (j=0; j<n; j++) { 223 if (in[j] >= cstart_orig && in[j] < cend_orig){ 224 col = in[j] - cstart_orig; 225 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 226 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 227 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 228 } else if (in[j] < 0) continue; 229 #if defined(PETSC_USE_DEBUG) 230 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 231 #endif 232 else { 233 if (mat->was_assembled) { 234 if (!baij->colmap) { 235 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 236 } 237 #if defined (PETSC_USE_CTABLE) 238 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 239 col = col - 1; 240 #else 241 col = baij->colmap[in[j]/bs] - 1; 242 #endif 243 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 244 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 245 col = in[j]; 246 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 247 B = baij->B; 248 b = (Mat_SeqBAIJ*)(B)->data; 249 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 250 ba=b->a; 251 } else col += in[j]%bs; 252 } else col = in[j]; 253 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 254 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 255 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 256 } 257 } 258 } else { 259 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 260 if (!baij->donotstash) { 261 if (roworiented) { 262 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 263 } else { 264 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 265 } 266 } 267 } 268 } 269 PetscFunctionReturn(0); 270 } 271 272 #undef __FUNCT__ 273 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 274 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 275 { 276 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 277 const PetscScalar *value; 278 MatScalar *barray=baij->barray; 279 PetscBool roworiented = baij->roworiented; 280 PetscErrorCode ierr; 281 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 282 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 283 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 284 285 PetscFunctionBegin; 286 if(!barray) { 287 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 288 baij->barray = barray; 289 } 290 291 if (roworiented) { 292 stepval = (n-1)*bs; 293 } else { 294 stepval = (m-1)*bs; 295 } 296 for (i=0; i<m; i++) { 297 if (im[i] < 0) continue; 298 #if defined(PETSC_USE_DEBUG) 299 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 300 #endif 301 if (im[i] >= rstart && im[i] < rend) { 302 row = im[i] - rstart; 303 for (j=0; j<n; j++) { 304 /* If NumCol = 1 then a copy is not required */ 305 if ((roworiented) && (n == 1)) { 306 barray = (MatScalar*)v + i*bs2; 307 } else if((!roworiented) && (m == 1)) { 308 barray = (MatScalar*)v + j*bs2; 309 } else { /* Here a copy is required */ 310 if (roworiented) { 311 value = v + (i*(stepval+bs) + j)*bs; 312 } else { 313 value = v + (j*(stepval+bs) + i)*bs; 314 } 315 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 316 for (jj=0; jj<bs; jj++) { 317 barray[jj] = value[jj]; 318 } 319 barray += bs; 320 } 321 barray -= bs2; 322 } 323 324 if (in[j] >= cstart && in[j] < cend){ 325 col = in[j] - cstart; 326 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 327 } 328 else if (in[j] < 0) continue; 329 #if defined(PETSC_USE_DEBUG) 330 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 331 #endif 332 else { 333 if (mat->was_assembled) { 334 if (!baij->colmap) { 335 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 336 } 337 338 #if defined(PETSC_USE_DEBUG) 339 #if defined (PETSC_USE_CTABLE) 340 { PetscInt data; 341 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 342 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 343 } 344 #else 345 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 346 #endif 347 #endif 348 #if defined (PETSC_USE_CTABLE) 349 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 350 col = (col - 1)/bs; 351 #else 352 col = (baij->colmap[in[j]] - 1)/bs; 353 #endif 354 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 355 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 356 col = in[j]; 357 } 358 } 359 else col = in[j]; 360 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 361 } 362 } 363 } else { 364 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 365 if (!baij->donotstash) { 366 if (roworiented) { 367 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 368 } else { 369 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 370 } 371 } 372 } 373 } 374 PetscFunctionReturn(0); 375 } 376 377 #define HASH_KEY 0.6180339887 378 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 379 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 380 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 381 #undef __FUNCT__ 382 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 383 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 384 { 385 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 386 PetscBool roworiented = baij->roworiented; 387 PetscErrorCode ierr; 388 PetscInt i,j,row,col; 389 PetscInt rstart_orig=mat->rmap->rstart; 390 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 391 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 392 PetscReal tmp; 393 MatScalar **HD = baij->hd,value; 394 #if defined(PETSC_USE_DEBUG) 395 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 396 #endif 397 398 PetscFunctionBegin; 399 if (v) PetscValidScalarPointer(v,6); 400 for (i=0; i<m; i++) { 401 #if defined(PETSC_USE_DEBUG) 402 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 403 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 404 #endif 405 row = im[i]; 406 if (row >= rstart_orig && row < rend_orig) { 407 for (j=0; j<n; j++) { 408 col = in[j]; 409 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 410 /* Look up PetscInto the Hash Table */ 411 key = (row/bs)*Nbs+(col/bs)+1; 412 h1 = HASH(size,key,tmp); 413 414 415 idx = h1; 416 #if defined(PETSC_USE_DEBUG) 417 insert_ct++; 418 total_ct++; 419 if (HT[idx] != key) { 420 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 421 if (idx == size) { 422 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 423 if (idx == h1) { 424 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 425 } 426 } 427 } 428 #else 429 if (HT[idx] != key) { 430 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 431 if (idx == size) { 432 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 433 if (idx == h1) { 434 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 435 } 436 } 437 } 438 #endif 439 /* A HASH table entry is found, so insert the values at the correct address */ 440 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 441 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 442 } 443 } else { 444 if (!baij->donotstash) { 445 if (roworiented) { 446 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 447 } else { 448 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 449 } 450 } 451 } 452 } 453 #if defined(PETSC_USE_DEBUG) 454 baij->ht_total_ct = total_ct; 455 baij->ht_insert_ct = insert_ct; 456 #endif 457 PetscFunctionReturn(0); 458 } 459 460 #undef __FUNCT__ 461 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 462 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 463 { 464 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 465 PetscBool roworiented = baij->roworiented; 466 PetscErrorCode ierr; 467 PetscInt i,j,ii,jj,row,col; 468 PetscInt rstart=baij->rstartbs; 469 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 470 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 471 PetscReal tmp; 472 MatScalar **HD = baij->hd,*baij_a; 473 const PetscScalar *v_t,*value; 474 #if defined(PETSC_USE_DEBUG) 475 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 476 #endif 477 478 PetscFunctionBegin; 479 480 if (roworiented) { 481 stepval = (n-1)*bs; 482 } else { 483 stepval = (m-1)*bs; 484 } 485 for (i=0; i<m; i++) { 486 #if defined(PETSC_USE_DEBUG) 487 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 488 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 489 #endif 490 row = im[i]; 491 v_t = v + i*nbs2; 492 if (row >= rstart && row < rend) { 493 for (j=0; j<n; j++) { 494 col = in[j]; 495 496 /* Look up into the Hash Table */ 497 key = row*Nbs+col+1; 498 h1 = HASH(size,key,tmp); 499 500 idx = h1; 501 #if defined(PETSC_USE_DEBUG) 502 total_ct++; 503 insert_ct++; 504 if (HT[idx] != key) { 505 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 506 if (idx == size) { 507 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 508 if (idx == h1) { 509 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 510 } 511 } 512 } 513 #else 514 if (HT[idx] != key) { 515 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 516 if (idx == size) { 517 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 518 if (idx == h1) { 519 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 520 } 521 } 522 } 523 #endif 524 baij_a = HD[idx]; 525 if (roworiented) { 526 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 527 /* value = v + (i*(stepval+bs)+j)*bs; */ 528 value = v_t; 529 v_t += bs; 530 if (addv == ADD_VALUES) { 531 for (ii=0; ii<bs; ii++,value+=stepval) { 532 for (jj=ii; jj<bs2; jj+=bs) { 533 baij_a[jj] += *value++; 534 } 535 } 536 } else { 537 for (ii=0; ii<bs; ii++,value+=stepval) { 538 for (jj=ii; jj<bs2; jj+=bs) { 539 baij_a[jj] = *value++; 540 } 541 } 542 } 543 } else { 544 value = v + j*(stepval+bs)*bs + i*bs; 545 if (addv == ADD_VALUES) { 546 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 547 for (jj=0; jj<bs; jj++) { 548 baij_a[jj] += *value++; 549 } 550 } 551 } else { 552 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 553 for (jj=0; jj<bs; jj++) { 554 baij_a[jj] = *value++; 555 } 556 } 557 } 558 } 559 } 560 } else { 561 if (!baij->donotstash) { 562 if (roworiented) { 563 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 564 } else { 565 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 566 } 567 } 568 } 569 } 570 #if defined(PETSC_USE_DEBUG) 571 baij->ht_total_ct = total_ct; 572 baij->ht_insert_ct = insert_ct; 573 #endif 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatGetValues_MPIBAIJ" 579 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 580 { 581 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 582 PetscErrorCode ierr; 583 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 584 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 585 586 PetscFunctionBegin; 587 for (i=0; i<m; i++) { 588 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 589 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 590 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 591 row = idxm[i] - bsrstart; 592 for (j=0; j<n; j++) { 593 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 594 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 595 if (idxn[j] >= bscstart && idxn[j] < bscend){ 596 col = idxn[j] - bscstart; 597 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 598 } else { 599 if (!baij->colmap) { 600 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 601 } 602 #if defined (PETSC_USE_CTABLE) 603 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 604 data --; 605 #else 606 data = baij->colmap[idxn[j]/bs]-1; 607 #endif 608 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 609 else { 610 col = data + idxn[j]%bs; 611 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 612 } 613 } 614 } 615 } else { 616 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 617 } 618 } 619 PetscFunctionReturn(0); 620 } 621 622 #undef __FUNCT__ 623 #define __FUNCT__ "MatNorm_MPIBAIJ" 624 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 625 { 626 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 627 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 628 PetscErrorCode ierr; 629 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 630 PetscReal sum = 0.0; 631 MatScalar *v; 632 633 PetscFunctionBegin; 634 if (baij->size == 1) { 635 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 636 } else { 637 if (type == NORM_FROBENIUS) { 638 v = amat->a; 639 nz = amat->nz*bs2; 640 for (i=0; i<nz; i++) { 641 #if defined(PETSC_USE_COMPLEX) 642 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 643 #else 644 sum += (*v)*(*v); v++; 645 #endif 646 } 647 v = bmat->a; 648 nz = bmat->nz*bs2; 649 for (i=0; i<nz; i++) { 650 #if defined(PETSC_USE_COMPLEX) 651 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 652 #else 653 sum += (*v)*(*v); v++; 654 #endif 655 } 656 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 657 *nrm = PetscSqrtReal(*nrm); 658 } else if (type == NORM_1) { /* max column sum */ 659 PetscReal *tmp,*tmp2; 660 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 661 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 662 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 663 v = amat->a; jj = amat->j; 664 for (i=0; i<amat->nz; i++) { 665 for (j=0; j<bs; j++){ 666 col = bs*(cstart + *jj) + j; /* column index */ 667 for (row=0; row<bs; row++){ 668 tmp[col] += PetscAbsScalar(*v); v++; 669 } 670 } 671 jj++; 672 } 673 v = bmat->a; jj = bmat->j; 674 for (i=0; i<bmat->nz; i++) { 675 for (j=0; j<bs; j++){ 676 col = bs*garray[*jj] + j; 677 for (row=0; row<bs; row++){ 678 tmp[col] += PetscAbsScalar(*v); v++; 679 } 680 } 681 jj++; 682 } 683 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 684 *nrm = 0.0; 685 for (j=0; j<mat->cmap->N; j++) { 686 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 687 } 688 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 689 } else if (type == NORM_INFINITY) { /* max row sum */ 690 PetscReal *sums; 691 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 692 sum = 0.0; 693 for (j=0; j<amat->mbs; j++) { 694 for (row=0; row<bs; row++) sums[row] = 0.0; 695 v = amat->a + bs2*amat->i[j]; 696 nz = amat->i[j+1]-amat->i[j]; 697 for (i=0; i<nz; i++) { 698 for (col=0; col<bs; col++){ 699 for (row=0; row<bs; row++){ 700 sums[row] += PetscAbsScalar(*v); v++; 701 } 702 } 703 } 704 v = bmat->a + bs2*bmat->i[j]; 705 nz = bmat->i[j+1]-bmat->i[j]; 706 for (i=0; i<nz; i++) { 707 for (col=0; col<bs; col++){ 708 for (row=0; row<bs; row++){ 709 sums[row] += PetscAbsScalar(*v); v++; 710 } 711 } 712 } 713 for (row=0; row<bs; row++){ 714 if (sums[row] > sum) sum = sums[row]; 715 } 716 } 717 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 718 ierr = PetscFree(sums);CHKERRQ(ierr); 719 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet"); 720 } 721 PetscFunctionReturn(0); 722 } 723 724 /* 725 Creates the hash table, and sets the table 726 This table is created only once. 727 If new entried need to be added to the matrix 728 then the hash table has to be destroyed and 729 recreated. 730 */ 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 733 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 734 { 735 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 736 Mat A = baij->A,B=baij->B; 737 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 738 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 739 PetscErrorCode ierr; 740 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 741 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 742 PetscInt *HT,key; 743 MatScalar **HD; 744 PetscReal tmp; 745 #if defined(PETSC_USE_INFO) 746 PetscInt ct=0,max=0; 747 #endif 748 749 PetscFunctionBegin; 750 if (baij->ht) PetscFunctionReturn(0); 751 752 baij->ht_size = (PetscInt)(factor*nz); 753 ht_size = baij->ht_size; 754 755 /* Allocate Memory for Hash Table */ 756 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 757 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 758 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 759 HD = baij->hd; 760 HT = baij->ht; 761 762 /* Loop Over A */ 763 for (i=0; i<a->mbs; i++) { 764 for (j=ai[i]; j<ai[i+1]; j++) { 765 row = i+rstart; 766 col = aj[j]+cstart; 767 768 key = row*Nbs + col + 1; 769 h1 = HASH(ht_size,key,tmp); 770 for (k=0; k<ht_size; k++){ 771 if (!HT[(h1+k)%ht_size]) { 772 HT[(h1+k)%ht_size] = key; 773 HD[(h1+k)%ht_size] = a->a + j*bs2; 774 break; 775 #if defined(PETSC_USE_INFO) 776 } else { 777 ct++; 778 #endif 779 } 780 } 781 #if defined(PETSC_USE_INFO) 782 if (k> max) max = k; 783 #endif 784 } 785 } 786 /* Loop Over B */ 787 for (i=0; i<b->mbs; i++) { 788 for (j=bi[i]; j<bi[i+1]; j++) { 789 row = i+rstart; 790 col = garray[bj[j]]; 791 key = row*Nbs + col + 1; 792 h1 = HASH(ht_size,key,tmp); 793 for (k=0; k<ht_size; k++){ 794 if (!HT[(h1+k)%ht_size]) { 795 HT[(h1+k)%ht_size] = key; 796 HD[(h1+k)%ht_size] = b->a + j*bs2; 797 break; 798 #if defined(PETSC_USE_INFO) 799 } else { 800 ct++; 801 #endif 802 } 803 } 804 #if defined(PETSC_USE_INFO) 805 if (k> max) max = k; 806 #endif 807 } 808 } 809 810 /* Print Summary */ 811 #if defined(PETSC_USE_INFO) 812 for (i=0,j=0; i<ht_size; i++) { 813 if (HT[i]) {j++;} 814 } 815 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 816 #endif 817 PetscFunctionReturn(0); 818 } 819 820 #undef __FUNCT__ 821 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 822 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 823 { 824 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 825 PetscErrorCode ierr; 826 PetscInt nstash,reallocs; 827 InsertMode addv; 828 829 PetscFunctionBegin; 830 if (baij->donotstash || mat->nooffprocentries) { 831 PetscFunctionReturn(0); 832 } 833 834 /* make sure all processors are either in INSERTMODE or ADDMODE */ 835 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 836 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 837 mat->insertmode = addv; /* in case this processor had no cache */ 838 839 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 840 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 841 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 842 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 843 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 844 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 845 PetscFunctionReturn(0); 846 } 847 848 #undef __FUNCT__ 849 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 850 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 851 { 852 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 853 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 854 PetscErrorCode ierr; 855 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 856 PetscInt *row,*col; 857 PetscBool r1,r2,r3,other_disassembled; 858 MatScalar *val; 859 InsertMode addv = mat->insertmode; 860 PetscMPIInt n; 861 862 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 863 PetscFunctionBegin; 864 if (!baij->donotstash && !mat->nooffprocentries) { 865 while (1) { 866 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 867 if (!flg) break; 868 869 for (i=0; i<n;) { 870 /* Now identify the consecutive vals belonging to the same row */ 871 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 872 if (j < n) ncols = j-i; 873 else ncols = n-i; 874 /* Now assemble all these values with a single function call */ 875 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 876 i = j; 877 } 878 } 879 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 880 /* Now process the block-stash. Since the values are stashed column-oriented, 881 set the roworiented flag to column oriented, and after MatSetValues() 882 restore the original flags */ 883 r1 = baij->roworiented; 884 r2 = a->roworiented; 885 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 886 baij->roworiented = PETSC_FALSE; 887 a->roworiented = PETSC_FALSE; 888 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 889 while (1) { 890 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 891 if (!flg) break; 892 893 for (i=0; i<n;) { 894 /* Now identify the consecutive vals belonging to the same row */ 895 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 896 if (j < n) ncols = j-i; 897 else ncols = n-i; 898 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 899 i = j; 900 } 901 } 902 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 903 baij->roworiented = r1; 904 a->roworiented = r2; 905 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 906 } 907 908 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 909 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 910 911 /* determine if any processor has disassembled, if so we must 912 also disassemble ourselfs, in order that we may reassemble. */ 913 /* 914 if nonzero structure of submatrix B cannot change then we know that 915 no processor disassembled thus we can skip this stuff 916 */ 917 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 918 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 919 if (mat->was_assembled && !other_disassembled) { 920 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 921 } 922 } 923 924 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 925 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 926 } 927 ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 928 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 929 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 930 931 #if defined(PETSC_USE_INFO) 932 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 933 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 934 baij->ht_total_ct = 0; 935 baij->ht_insert_ct = 0; 936 } 937 #endif 938 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 939 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 940 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 941 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 942 } 943 944 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 945 baij->rowvalues = 0; 946 PetscFunctionReturn(0); 947 } 948 949 #undef __FUNCT__ 950 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 951 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 952 { 953 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 954 PetscErrorCode ierr; 955 PetscMPIInt size = baij->size,rank = baij->rank; 956 PetscInt bs = mat->rmap->bs; 957 PetscBool iascii,isdraw; 958 PetscViewer sviewer; 959 PetscViewerFormat format; 960 961 PetscFunctionBegin; 962 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 963 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 964 if (iascii) { 965 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 966 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 967 MatInfo info; 968 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 969 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 970 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 971 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 972 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 973 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 974 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 975 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 976 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 977 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 978 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 979 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 980 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 981 PetscFunctionReturn(0); 982 } else if (format == PETSC_VIEWER_ASCII_INFO) { 983 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 984 PetscFunctionReturn(0); 985 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 986 PetscFunctionReturn(0); 987 } 988 } 989 990 if (isdraw) { 991 PetscDraw draw; 992 PetscBool isnull; 993 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 994 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 995 } 996 997 if (size == 1) { 998 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 999 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1000 } else { 1001 /* assemble the entire matrix onto first processor. */ 1002 Mat A; 1003 Mat_SeqBAIJ *Aloc; 1004 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1005 MatScalar *a; 1006 1007 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1008 /* Perhaps this should be the type of mat? */ 1009 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1010 if (!rank) { 1011 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1012 } else { 1013 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1014 } 1015 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1016 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1017 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1018 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1019 1020 /* copy over the A part */ 1021 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1022 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1023 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1024 1025 for (i=0; i<mbs; i++) { 1026 rvals[0] = bs*(baij->rstartbs + i); 1027 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1028 for (j=ai[i]; j<ai[i+1]; j++) { 1029 col = (baij->cstartbs+aj[j])*bs; 1030 for (k=0; k<bs; k++) { 1031 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1032 col++; a += bs; 1033 } 1034 } 1035 } 1036 /* copy over the B part */ 1037 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1038 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1039 for (i=0; i<mbs; i++) { 1040 rvals[0] = bs*(baij->rstartbs + i); 1041 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1042 for (j=ai[i]; j<ai[i+1]; j++) { 1043 col = baij->garray[aj[j]]*bs; 1044 for (k=0; k<bs; k++) { 1045 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1046 col++; a += bs; 1047 } 1048 } 1049 } 1050 ierr = PetscFree(rvals);CHKERRQ(ierr); 1051 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1052 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1053 /* 1054 Everyone has to call to draw the matrix since the graphics waits are 1055 synchronized across all processors that share the PetscDraw object 1056 */ 1057 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1058 if (!rank) { 1059 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1060 /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/ 1061 PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ); 1062 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1063 } 1064 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1065 ierr = MatDestroy(&A);CHKERRQ(ierr); 1066 } 1067 PetscFunctionReturn(0); 1068 } 1069 1070 #undef __FUNCT__ 1071 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1072 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1073 { 1074 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1075 Mat_SeqBAIJ* A = (Mat_SeqBAIJ*)a->A->data; 1076 Mat_SeqBAIJ* B = (Mat_SeqBAIJ*)a->B->data; 1077 PetscErrorCode ierr; 1078 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1079 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1080 int fd; 1081 PetscScalar *column_values; 1082 FILE *file; 1083 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1084 PetscInt message_count,flowcontrolcount; 1085 1086 PetscFunctionBegin; 1087 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1088 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1089 nz = bs2*(A->nz + B->nz); 1090 rlen = mat->rmap->n; 1091 if (!rank) { 1092 header[0] = MAT_FILE_CLASSID; 1093 header[1] = mat->rmap->N; 1094 header[2] = mat->cmap->N; 1095 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1096 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1097 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1098 /* get largest number of rows any processor has */ 1099 range = mat->rmap->range; 1100 for (i=1; i<size; i++) { 1101 rlen = PetscMax(rlen,range[i+1] - range[i]); 1102 } 1103 } else { 1104 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1105 } 1106 1107 ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr); 1108 /* compute lengths of each row */ 1109 for (i=0; i<a->mbs; i++) { 1110 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1111 } 1112 /* store the row lengths to the file */ 1113 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1114 if (!rank) { 1115 MPI_Status status; 1116 ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr); 1117 rlen = (range[1] - range[0])/bs; 1118 for (i=0; i<rlen; i++) { 1119 for (j=0; j<bs; j++) { 1120 row_lens[i*bs+j] = bs*crow_lens[i]; 1121 } 1122 } 1123 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1124 for (i=1; i<size; i++) { 1125 rlen = (range[i+1] - range[i])/bs; 1126 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1127 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1128 for (k=0; k<rlen; k++) { 1129 for (j=0; j<bs; j++) { 1130 row_lens[k*bs+j] = bs*crow_lens[k]; 1131 } 1132 } 1133 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1134 } 1135 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1136 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1137 } else { 1138 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1139 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1140 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1141 } 1142 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1143 1144 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1145 information needed to make it for each row from a block row. This does require more communication but still not more than 1146 the communication needed for the nonzero values */ 1147 nzmax = nz; /* space a largest processor needs */ 1148 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1149 ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1150 cnt = 0; 1151 for (i=0; i<a->mbs; i++) { 1152 pcnt = cnt; 1153 for (j=B->i[i]; j<B->i[i+1]; j++) { 1154 if ( (col = garray[B->j[j]]) > cstart) break; 1155 for (l=0; l<bs; l++) { 1156 column_indices[cnt++] = bs*col+l; 1157 } 1158 } 1159 for (k=A->i[i]; k<A->i[i+1]; k++) { 1160 for (l=0; l<bs; l++) { 1161 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1162 } 1163 } 1164 for (; j<B->i[i+1]; j++) { 1165 for (l=0; l<bs; l++) { 1166 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1167 } 1168 } 1169 len = cnt - pcnt; 1170 for (k=1; k<bs; k++) { 1171 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1172 cnt += len; 1173 } 1174 } 1175 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1176 1177 /* store the columns to the file */ 1178 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1179 if (!rank) { 1180 MPI_Status status; 1181 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1182 for (i=1; i<size; i++) { 1183 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1184 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1185 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1186 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1187 } 1188 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1189 } else { 1190 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1191 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1192 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1193 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1194 } 1195 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1196 1197 /* load up the numerical values */ 1198 ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1199 cnt = 0; 1200 for (i=0; i<a->mbs; i++) { 1201 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1202 for (j=B->i[i]; j<B->i[i+1]; j++) { 1203 if ( garray[B->j[j]] > cstart) break; 1204 for (l=0; l<bs; l++) { 1205 for (ll=0; ll<bs; ll++) { 1206 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1207 } 1208 } 1209 cnt += bs; 1210 } 1211 for (k=A->i[i]; k<A->i[i+1]; k++) { 1212 for (l=0; l<bs; l++) { 1213 for (ll=0; ll<bs; ll++) { 1214 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1215 } 1216 } 1217 cnt += bs; 1218 } 1219 for (; j<B->i[i+1]; j++) { 1220 for (l=0; l<bs; l++) { 1221 for (ll=0; ll<bs; ll++) { 1222 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1223 } 1224 } 1225 cnt += bs; 1226 } 1227 cnt += (bs-1)*rlen; 1228 } 1229 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1230 1231 /* store the column values to the file */ 1232 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1233 if (!rank) { 1234 MPI_Status status; 1235 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1236 for (i=1; i<size; i++) { 1237 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1238 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1239 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1240 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1241 } 1242 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1243 } else { 1244 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1245 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1246 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1247 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1248 } 1249 ierr = PetscFree(column_values);CHKERRQ(ierr); 1250 1251 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1252 if (file) { 1253 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1254 } 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatView_MPIBAIJ" 1260 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1261 { 1262 PetscErrorCode ierr; 1263 PetscBool iascii,isdraw,issocket,isbinary; 1264 1265 PetscFunctionBegin; 1266 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1267 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1268 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1269 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1270 if (iascii || isdraw || issocket) { 1271 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1272 } else if (isbinary) { 1273 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1274 } else { 1275 SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1276 } 1277 PetscFunctionReturn(0); 1278 } 1279 1280 #undef __FUNCT__ 1281 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1282 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1283 { 1284 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1285 PetscErrorCode ierr; 1286 1287 PetscFunctionBegin; 1288 #if defined(PETSC_USE_LOG) 1289 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1290 #endif 1291 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1292 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1293 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1294 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1295 #if defined (PETSC_USE_CTABLE) 1296 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1297 #else 1298 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1299 #endif 1300 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1301 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1302 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1303 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1304 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1305 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1306 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1307 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1308 1309 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1310 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1311 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1312 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1313 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1314 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1315 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1316 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1317 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1318 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",PETSC_NULL);CHKERRQ(ierr); 1319 PetscFunctionReturn(0); 1320 } 1321 1322 #undef __FUNCT__ 1323 #define __FUNCT__ "MatMult_MPIBAIJ" 1324 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1325 { 1326 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1327 PetscErrorCode ierr; 1328 PetscInt nt; 1329 1330 PetscFunctionBegin; 1331 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1332 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1333 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1334 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1335 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1336 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1337 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1338 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1339 PetscFunctionReturn(0); 1340 } 1341 1342 #undef __FUNCT__ 1343 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1344 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1345 { 1346 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1347 PetscErrorCode ierr; 1348 1349 PetscFunctionBegin; 1350 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1351 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1352 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1353 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1354 PetscFunctionReturn(0); 1355 } 1356 1357 #undef __FUNCT__ 1358 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1359 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1360 { 1361 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1362 PetscErrorCode ierr; 1363 PetscBool merged; 1364 1365 PetscFunctionBegin; 1366 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1367 /* do nondiagonal part */ 1368 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1369 if (!merged) { 1370 /* send it on its way */ 1371 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1372 /* do local part */ 1373 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1374 /* receive remote parts: note this assumes the values are not actually */ 1375 /* inserted in yy until the next line */ 1376 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1377 } else { 1378 /* do local part */ 1379 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1380 /* send it on its way */ 1381 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1382 /* values actually were received in the Begin() but we need to call this nop */ 1383 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1384 } 1385 PetscFunctionReturn(0); 1386 } 1387 1388 #undef __FUNCT__ 1389 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1390 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1391 { 1392 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1393 PetscErrorCode ierr; 1394 1395 PetscFunctionBegin; 1396 /* do nondiagonal part */ 1397 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1398 /* send it on its way */ 1399 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1400 /* do local part */ 1401 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1402 /* receive remote parts: note this assumes the values are not actually */ 1403 /* inserted in yy until the next line, which is true for my implementation*/ 1404 /* but is not perhaps always true. */ 1405 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1406 PetscFunctionReturn(0); 1407 } 1408 1409 /* 1410 This only works correctly for square matrices where the subblock A->A is the 1411 diagonal block 1412 */ 1413 #undef __FUNCT__ 1414 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1415 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1416 { 1417 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1418 PetscErrorCode ierr; 1419 1420 PetscFunctionBegin; 1421 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1422 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1423 PetscFunctionReturn(0); 1424 } 1425 1426 #undef __FUNCT__ 1427 #define __FUNCT__ "MatScale_MPIBAIJ" 1428 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1429 { 1430 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1431 PetscErrorCode ierr; 1432 1433 PetscFunctionBegin; 1434 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1435 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1436 PetscFunctionReturn(0); 1437 } 1438 1439 #undef __FUNCT__ 1440 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1441 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1442 { 1443 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1444 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1445 PetscErrorCode ierr; 1446 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1447 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1448 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1449 1450 PetscFunctionBegin; 1451 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1452 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1453 mat->getrowactive = PETSC_TRUE; 1454 1455 if (!mat->rowvalues && (idx || v)) { 1456 /* 1457 allocate enough space to hold information from the longest row. 1458 */ 1459 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1460 PetscInt max = 1,mbs = mat->mbs,tmp; 1461 for (i=0; i<mbs; i++) { 1462 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1463 if (max < tmp) { max = tmp; } 1464 } 1465 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1466 } 1467 lrow = row - brstart; 1468 1469 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1470 if (!v) {pvA = 0; pvB = 0;} 1471 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1472 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1473 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1474 nztot = nzA + nzB; 1475 1476 cmap = mat->garray; 1477 if (v || idx) { 1478 if (nztot) { 1479 /* Sort by increasing column numbers, assuming A and B already sorted */ 1480 PetscInt imark = -1; 1481 if (v) { 1482 *v = v_p = mat->rowvalues; 1483 for (i=0; i<nzB; i++) { 1484 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1485 else break; 1486 } 1487 imark = i; 1488 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1489 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1490 } 1491 if (idx) { 1492 *idx = idx_p = mat->rowindices; 1493 if (imark > -1) { 1494 for (i=0; i<imark; i++) { 1495 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1496 } 1497 } else { 1498 for (i=0; i<nzB; i++) { 1499 if (cmap[cworkB[i]/bs] < cstart) 1500 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1501 else break; 1502 } 1503 imark = i; 1504 } 1505 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1506 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1507 } 1508 } else { 1509 if (idx) *idx = 0; 1510 if (v) *v = 0; 1511 } 1512 } 1513 *nz = nztot; 1514 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1515 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1516 PetscFunctionReturn(0); 1517 } 1518 1519 #undef __FUNCT__ 1520 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1521 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1522 { 1523 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1524 1525 PetscFunctionBegin; 1526 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1527 baij->getrowactive = PETSC_FALSE; 1528 PetscFunctionReturn(0); 1529 } 1530 1531 #undef __FUNCT__ 1532 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1533 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1534 { 1535 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1536 PetscErrorCode ierr; 1537 1538 PetscFunctionBegin; 1539 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1540 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1541 PetscFunctionReturn(0); 1542 } 1543 1544 #undef __FUNCT__ 1545 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1546 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1547 { 1548 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1549 Mat A = a->A,B = a->B; 1550 PetscErrorCode ierr; 1551 PetscReal isend[5],irecv[5]; 1552 1553 PetscFunctionBegin; 1554 info->block_size = (PetscReal)matin->rmap->bs; 1555 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1556 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1557 isend[3] = info->memory; isend[4] = info->mallocs; 1558 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1559 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1560 isend[3] += info->memory; isend[4] += info->mallocs; 1561 if (flag == MAT_LOCAL) { 1562 info->nz_used = isend[0]; 1563 info->nz_allocated = isend[1]; 1564 info->nz_unneeded = isend[2]; 1565 info->memory = isend[3]; 1566 info->mallocs = isend[4]; 1567 } else if (flag == MAT_GLOBAL_MAX) { 1568 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1569 info->nz_used = irecv[0]; 1570 info->nz_allocated = irecv[1]; 1571 info->nz_unneeded = irecv[2]; 1572 info->memory = irecv[3]; 1573 info->mallocs = irecv[4]; 1574 } else if (flag == MAT_GLOBAL_SUM) { 1575 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1576 info->nz_used = irecv[0]; 1577 info->nz_allocated = irecv[1]; 1578 info->nz_unneeded = irecv[2]; 1579 info->memory = irecv[3]; 1580 info->mallocs = irecv[4]; 1581 } else { 1582 SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1583 } 1584 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1585 info->fill_ratio_needed = 0; 1586 info->factor_mallocs = 0; 1587 PetscFunctionReturn(0); 1588 } 1589 1590 #undef __FUNCT__ 1591 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1592 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1593 { 1594 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1595 PetscErrorCode ierr; 1596 1597 PetscFunctionBegin; 1598 switch (op) { 1599 case MAT_NEW_NONZERO_LOCATIONS: 1600 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1601 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1602 case MAT_KEEP_NONZERO_PATTERN: 1603 case MAT_NEW_NONZERO_LOCATION_ERR: 1604 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1605 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1606 break; 1607 case MAT_ROW_ORIENTED: 1608 a->roworiented = flg; 1609 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1610 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1611 break; 1612 case MAT_NEW_DIAGONALS: 1613 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1614 break; 1615 case MAT_IGNORE_OFF_PROC_ENTRIES: 1616 a->donotstash = flg; 1617 break; 1618 case MAT_USE_HASH_TABLE: 1619 a->ht_flag = flg; 1620 break; 1621 case MAT_SYMMETRIC: 1622 case MAT_STRUCTURALLY_SYMMETRIC: 1623 case MAT_HERMITIAN: 1624 case MAT_SYMMETRY_ETERNAL: 1625 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1626 break; 1627 default: 1628 SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op); 1629 } 1630 PetscFunctionReturn(0); 1631 } 1632 1633 #undef __FUNCT__ 1634 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1635 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1636 { 1637 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1638 Mat_SeqBAIJ *Aloc; 1639 Mat B; 1640 PetscErrorCode ierr; 1641 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1642 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1643 MatScalar *a; 1644 1645 PetscFunctionBegin; 1646 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1647 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1648 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1649 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1650 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1651 /* Do not know preallocation information, but must set block size */ 1652 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,PETSC_NULL,PETSC_DECIDE,PETSC_NULL);CHKERRQ(ierr); 1653 } else { 1654 B = *matout; 1655 } 1656 1657 /* copy over the A part */ 1658 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1659 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1660 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1661 1662 for (i=0; i<mbs; i++) { 1663 rvals[0] = bs*(baij->rstartbs + i); 1664 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1665 for (j=ai[i]; j<ai[i+1]; j++) { 1666 col = (baij->cstartbs+aj[j])*bs; 1667 for (k=0; k<bs; k++) { 1668 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1669 col++; a += bs; 1670 } 1671 } 1672 } 1673 /* copy over the B part */ 1674 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1675 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1676 for (i=0; i<mbs; i++) { 1677 rvals[0] = bs*(baij->rstartbs + i); 1678 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1679 for (j=ai[i]; j<ai[i+1]; j++) { 1680 col = baij->garray[aj[j]]*bs; 1681 for (k=0; k<bs; k++) { 1682 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1683 col++; a += bs; 1684 } 1685 } 1686 } 1687 ierr = PetscFree(rvals);CHKERRQ(ierr); 1688 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1689 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1690 1691 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1692 *matout = B; 1693 } else { 1694 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1695 } 1696 PetscFunctionReturn(0); 1697 } 1698 1699 #undef __FUNCT__ 1700 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1701 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1702 { 1703 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1704 Mat a = baij->A,b = baij->B; 1705 PetscErrorCode ierr; 1706 PetscInt s1,s2,s3; 1707 1708 PetscFunctionBegin; 1709 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1710 if (rr) { 1711 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1712 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1713 /* Overlap communication with computation. */ 1714 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1715 } 1716 if (ll) { 1717 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1718 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1719 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1720 } 1721 /* scale the diagonal block */ 1722 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1723 1724 if (rr) { 1725 /* Do a scatter end and then right scale the off-diagonal block */ 1726 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1727 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1728 } 1729 1730 PetscFunctionReturn(0); 1731 } 1732 1733 #undef __FUNCT__ 1734 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1735 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1736 { 1737 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1738 PetscErrorCode ierr; 1739 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1740 PetscInt i,*owners = A->rmap->range; 1741 PetscInt *nprocs,j,idx,nsends,row; 1742 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1743 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1744 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1745 MPI_Comm comm = ((PetscObject)A)->comm; 1746 MPI_Request *send_waits,*recv_waits; 1747 MPI_Status recv_status,*send_status; 1748 const PetscScalar *xx; 1749 PetscScalar *bb; 1750 #if defined(PETSC_DEBUG) 1751 PetscBool found = PETSC_FALSE; 1752 #endif 1753 1754 PetscFunctionBegin; 1755 /* first count number of contributors to each processor */ 1756 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1757 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1758 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1759 j = 0; 1760 for (i=0; i<N; i++) { 1761 if (lastidx > (idx = rows[i])) j = 0; 1762 lastidx = idx; 1763 for (; j<size; j++) { 1764 if (idx >= owners[j] && idx < owners[j+1]) { 1765 nprocs[2*j]++; 1766 nprocs[2*j+1] = 1; 1767 owner[i] = j; 1768 #if defined(PETSC_DEBUG) 1769 found = PETSC_TRUE; 1770 #endif 1771 break; 1772 } 1773 } 1774 #if defined(PETSC_DEBUG) 1775 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1776 found = PETSC_FALSE; 1777 #endif 1778 } 1779 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1780 1781 if (A->nooffproczerorows) { 1782 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 1783 nrecvs = nsends; 1784 nmax = N; 1785 } else { 1786 /* inform other processors of number of messages and max length*/ 1787 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1788 } 1789 1790 /* post receives: */ 1791 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1792 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1793 for (i=0; i<nrecvs; i++) { 1794 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1795 } 1796 1797 /* do sends: 1798 1) starts[i] gives the starting index in svalues for stuff going to 1799 the ith processor 1800 */ 1801 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1802 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1803 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1804 starts[0] = 0; 1805 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1806 for (i=0; i<N; i++) { 1807 svalues[starts[owner[i]]++] = rows[i]; 1808 } 1809 1810 starts[0] = 0; 1811 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1812 count = 0; 1813 for (i=0; i<size; i++) { 1814 if (nprocs[2*i+1]) { 1815 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1816 } 1817 } 1818 ierr = PetscFree(starts);CHKERRQ(ierr); 1819 1820 base = owners[rank]; 1821 1822 /* wait on receives */ 1823 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1824 count = nrecvs; 1825 slen = 0; 1826 while (count) { 1827 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1828 /* unpack receives into our local space */ 1829 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1830 source[imdex] = recv_status.MPI_SOURCE; 1831 lens[imdex] = n; 1832 slen += n; 1833 count--; 1834 } 1835 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1836 1837 /* move the data into the send scatter */ 1838 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1839 count = 0; 1840 for (i=0; i<nrecvs; i++) { 1841 values = rvalues + i*nmax; 1842 for (j=0; j<lens[i]; j++) { 1843 lrows[count++] = values[j] - base; 1844 } 1845 } 1846 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1847 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1848 ierr = PetscFree(owner);CHKERRQ(ierr); 1849 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1850 1851 /* fix right hand side if needed */ 1852 if (x && b) { 1853 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1854 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1855 for (i=0; i<slen; i++) { 1856 bb[lrows[i]] = diag*xx[lrows[i]]; 1857 } 1858 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1859 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1860 } 1861 1862 /* actually zap the local rows */ 1863 /* 1864 Zero the required rows. If the "diagonal block" of the matrix 1865 is square and the user wishes to set the diagonal we use separate 1866 code so that MatSetValues() is not called for each diagonal allocating 1867 new memory, thus calling lots of mallocs and slowing things down. 1868 1869 */ 1870 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1871 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1872 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1873 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 1874 } else if (diag != 0.0) { 1875 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1876 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1877 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1878 for (i=0; i<slen; i++) { 1879 row = lrows[i] + rstart_bs; 1880 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1881 } 1882 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1883 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1884 } else { 1885 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1886 } 1887 1888 ierr = PetscFree(lrows);CHKERRQ(ierr); 1889 1890 /* wait on sends */ 1891 if (nsends) { 1892 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1893 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1894 ierr = PetscFree(send_status);CHKERRQ(ierr); 1895 } 1896 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1897 ierr = PetscFree(svalues);CHKERRQ(ierr); 1898 1899 PetscFunctionReturn(0); 1900 } 1901 1902 #undef __FUNCT__ 1903 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1904 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1905 { 1906 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1907 PetscErrorCode ierr; 1908 1909 PetscFunctionBegin; 1910 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1911 PetscFunctionReturn(0); 1912 } 1913 1914 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1915 1916 #undef __FUNCT__ 1917 #define __FUNCT__ "MatEqual_MPIBAIJ" 1918 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1919 { 1920 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1921 Mat a,b,c,d; 1922 PetscBool flg; 1923 PetscErrorCode ierr; 1924 1925 PetscFunctionBegin; 1926 a = matA->A; b = matA->B; 1927 c = matB->A; d = matB->B; 1928 1929 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1930 if (flg) { 1931 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1932 } 1933 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1934 PetscFunctionReturn(0); 1935 } 1936 1937 #undef __FUNCT__ 1938 #define __FUNCT__ "MatCopy_MPIBAIJ" 1939 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1940 { 1941 PetscErrorCode ierr; 1942 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1943 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1944 1945 PetscFunctionBegin; 1946 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1947 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1948 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1949 } else { 1950 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1951 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1952 } 1953 PetscFunctionReturn(0); 1954 } 1955 1956 #undef __FUNCT__ 1957 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1958 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1959 { 1960 PetscErrorCode ierr; 1961 1962 PetscFunctionBegin; 1963 ierr = MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1964 PetscFunctionReturn(0); 1965 } 1966 1967 #undef __FUNCT__ 1968 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1969 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1970 { 1971 PetscErrorCode ierr; 1972 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1973 PetscBLASInt bnz,one=1; 1974 Mat_SeqBAIJ *x,*y; 1975 1976 PetscFunctionBegin; 1977 if (str == SAME_NONZERO_PATTERN) { 1978 PetscScalar alpha = a; 1979 x = (Mat_SeqBAIJ *)xx->A->data; 1980 y = (Mat_SeqBAIJ *)yy->A->data; 1981 bnz = PetscBLASIntCast(x->nz); 1982 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1983 x = (Mat_SeqBAIJ *)xx->B->data; 1984 y = (Mat_SeqBAIJ *)yy->B->data; 1985 bnz = PetscBLASIntCast(x->nz); 1986 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1987 } else { 1988 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1989 } 1990 PetscFunctionReturn(0); 1991 } 1992 1993 #undef __FUNCT__ 1994 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ" 1995 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs) 1996 { 1997 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1998 PetscInt rbs,cbs; 1999 PetscErrorCode ierr; 2000 2001 PetscFunctionBegin; 2002 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 2003 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 2004 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 2005 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 2006 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 2007 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 2008 PetscFunctionReturn(0); 2009 } 2010 2011 #undef __FUNCT__ 2012 #define __FUNCT__ "MatRealPart_MPIBAIJ" 2013 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 2014 { 2015 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2016 PetscErrorCode ierr; 2017 2018 PetscFunctionBegin; 2019 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2020 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2021 PetscFunctionReturn(0); 2022 } 2023 2024 #undef __FUNCT__ 2025 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 2026 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2027 { 2028 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2029 PetscErrorCode ierr; 2030 2031 PetscFunctionBegin; 2032 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2033 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2034 PetscFunctionReturn(0); 2035 } 2036 2037 #undef __FUNCT__ 2038 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2039 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2040 { 2041 PetscErrorCode ierr; 2042 IS iscol_local; 2043 PetscInt csize; 2044 2045 PetscFunctionBegin; 2046 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2047 if (call == MAT_REUSE_MATRIX) { 2048 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2049 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2050 } else { 2051 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2052 } 2053 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2054 if (call == MAT_INITIAL_MATRIX) { 2055 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2056 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2057 } 2058 PetscFunctionReturn(0); 2059 } 2060 2061 #undef __FUNCT__ 2062 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2063 /* 2064 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2065 in local and then by concatenating the local matrices the end result. 2066 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2067 */ 2068 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2069 { 2070 PetscErrorCode ierr; 2071 PetscMPIInt rank,size; 2072 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2073 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2074 Mat *local,M,Mreuse; 2075 MatScalar *vwork,*aa; 2076 MPI_Comm comm = ((PetscObject)mat)->comm; 2077 Mat_SeqBAIJ *aij; 2078 2079 2080 PetscFunctionBegin; 2081 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2082 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2083 2084 if (call == MAT_REUSE_MATRIX) { 2085 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2086 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2087 local = &Mreuse; 2088 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2089 } else { 2090 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2091 Mreuse = *local; 2092 ierr = PetscFree(local);CHKERRQ(ierr); 2093 } 2094 2095 /* 2096 m - number of local rows 2097 n - number of columns (same on all processors) 2098 rstart - first row in new global matrix generated 2099 */ 2100 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2101 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2102 m = m/bs; 2103 n = n/bs; 2104 2105 if (call == MAT_INITIAL_MATRIX) { 2106 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2107 ii = aij->i; 2108 jj = aij->j; 2109 2110 /* 2111 Determine the number of non-zeros in the diagonal and off-diagonal 2112 portions of the matrix in order to do correct preallocation 2113 */ 2114 2115 /* first get start and end of "diagonal" columns */ 2116 if (csize == PETSC_DECIDE) { 2117 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2118 if (mglobal == n*bs) { /* square matrix */ 2119 nlocal = m; 2120 } else { 2121 nlocal = n/size + ((n % size) > rank); 2122 } 2123 } else { 2124 nlocal = csize/bs; 2125 } 2126 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2127 rstart = rend - nlocal; 2128 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2129 2130 /* next, compute all the lengths */ 2131 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2132 olens = dlens + m; 2133 for (i=0; i<m; i++) { 2134 jend = ii[i+1] - ii[i]; 2135 olen = 0; 2136 dlen = 0; 2137 for (j=0; j<jend; j++) { 2138 if (*jj < rstart || *jj >= rend) olen++; 2139 else dlen++; 2140 jj++; 2141 } 2142 olens[i] = olen; 2143 dlens[i] = dlen; 2144 } 2145 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2146 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2147 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2148 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2149 ierr = PetscFree(dlens);CHKERRQ(ierr); 2150 } else { 2151 PetscInt ml,nl; 2152 2153 M = *newmat; 2154 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2155 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2156 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2157 /* 2158 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2159 rather than the slower MatSetValues(). 2160 */ 2161 M->was_assembled = PETSC_TRUE; 2162 M->assembled = PETSC_FALSE; 2163 } 2164 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2165 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2166 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2167 ii = aij->i; 2168 jj = aij->j; 2169 aa = aij->a; 2170 for (i=0; i<m; i++) { 2171 row = rstart/bs + i; 2172 nz = ii[i+1] - ii[i]; 2173 cwork = jj; jj += nz; 2174 vwork = aa; aa += nz; 2175 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2176 } 2177 2178 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2179 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2180 *newmat = M; 2181 2182 /* save submatrix used in processor for next request */ 2183 if (call == MAT_INITIAL_MATRIX) { 2184 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2185 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2186 } 2187 2188 PetscFunctionReturn(0); 2189 } 2190 2191 #undef __FUNCT__ 2192 #define __FUNCT__ "MatPermute_MPIBAIJ" 2193 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2194 { 2195 MPI_Comm comm,pcomm; 2196 PetscInt first,local_size,nrows; 2197 const PetscInt *rows; 2198 PetscMPIInt size; 2199 IS crowp,growp,irowp,lrowp,lcolp,icolp; 2200 PetscErrorCode ierr; 2201 2202 PetscFunctionBegin; 2203 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2204 /* make a collective version of 'rowp' */ 2205 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2206 if (pcomm==comm) { 2207 crowp = rowp; 2208 } else { 2209 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2210 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2211 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2212 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2213 } 2214 /* collect the global row permutation and invert it */ 2215 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2216 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2217 if (pcomm!=comm) { 2218 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2219 } 2220 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2221 /* get the local target indices */ 2222 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2223 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 2224 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2225 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 2226 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2227 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 2228 /* the column permutation is so much easier; 2229 make a local version of 'colp' and invert it */ 2230 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2231 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2232 if (size==1) { 2233 lcolp = colp; 2234 } else { 2235 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 2236 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 2237 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr); 2238 } 2239 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2240 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 2241 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 2242 if (size>1) { 2243 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 2244 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2245 } 2246 /* now we just get the submatrix */ 2247 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2248 /* clean up */ 2249 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 2250 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 2251 PetscFunctionReturn(0); 2252 } 2253 2254 #undef __FUNCT__ 2255 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2256 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2257 { 2258 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2259 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2260 2261 PetscFunctionBegin; 2262 if (nghosts) { *nghosts = B->nbs;} 2263 if (ghosts) {*ghosts = baij->garray;} 2264 PetscFunctionReturn(0); 2265 } 2266 2267 extern PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat); 2268 2269 #undef __FUNCT__ 2270 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2271 /* 2272 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2273 */ 2274 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2275 { 2276 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2277 PetscErrorCode ierr; 2278 PetscMPIInt size,*ncolsonproc,*disp,nn; 2279 PetscInt bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col; 2280 const PetscInt *is; 2281 PetscInt nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj; 2282 PetscInt *rowhit,M,cstart,cend,colb; 2283 PetscInt *columnsforrow,l; 2284 IS *isa; 2285 PetscBool done,flg; 2286 ISLocalToGlobalMapping map = mat->cmap->bmapping; 2287 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2288 2289 PetscFunctionBegin; 2290 if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2291 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2292 2293 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2294 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2295 M = mat->rmap->n/bs; 2296 cstart = mat->cmap->rstart/bs; 2297 cend = mat->cmap->rend/bs; 2298 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2299 c->N = mat->cmap->N/bs; 2300 c->m = mat->rmap->n/bs; 2301 c->rstart = mat->rmap->rstart/bs; 2302 2303 c->ncolors = nis; 2304 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2305 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2306 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2307 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2308 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2309 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2310 2311 /* Allow access to data structures of local part of matrix */ 2312 if (!baij->colmap) { 2313 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2314 } 2315 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2316 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2317 2318 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2319 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2320 2321 for (i=0; i<nis; i++) { 2322 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2323 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2324 c->ncolumns[i] = n; 2325 if (n) { 2326 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2327 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2328 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2329 } else { 2330 c->columns[i] = 0; 2331 } 2332 2333 if (ctype == IS_COLORING_GLOBAL){ 2334 /* Determine the total (parallel) number of columns of this color */ 2335 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2336 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2337 2338 nn = PetscMPIIntCast(n); 2339 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2340 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2341 if (!nctot) { 2342 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2343 } 2344 2345 disp[0] = 0; 2346 for (j=1; j<size; j++) { 2347 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2348 } 2349 2350 /* Get complete list of columns for color on each processor */ 2351 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2352 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2353 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2354 } else if (ctype == IS_COLORING_GHOSTED){ 2355 /* Determine local number of columns of this color on this process, including ghost points */ 2356 nctot = n; 2357 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2358 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2359 } else { 2360 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2361 } 2362 2363 /* 2364 Mark all rows affect by these columns 2365 */ 2366 /* Temporary option to allow for debugging/testing */ 2367 flg = PETSC_FALSE; 2368 ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2369 if (!flg) {/*-----------------------------------------------------------------------------*/ 2370 /* crude, fast version */ 2371 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2372 /* loop over columns*/ 2373 for (j=0; j<nctot; j++) { 2374 if (ctype == IS_COLORING_GHOSTED) { 2375 col = ltog[cols[j]]; 2376 } else { 2377 col = cols[j]; 2378 } 2379 if (col >= cstart && col < cend) { 2380 /* column is in diagonal block of matrix */ 2381 rows = A_cj + A_ci[col-cstart]; 2382 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2383 } else { 2384 #if defined (PETSC_USE_CTABLE) 2385 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2386 colb --; 2387 #else 2388 colb = baij->colmap[col] - 1; 2389 #endif 2390 if (colb == -1) { 2391 m = 0; 2392 } else { 2393 colb = colb/bs; 2394 rows = B_cj + B_ci[colb]; 2395 m = B_ci[colb+1] - B_ci[colb]; 2396 } 2397 } 2398 /* loop over columns marking them in rowhit */ 2399 for (k=0; k<m; k++) { 2400 rowhit[*rows++] = col + 1; 2401 } 2402 } 2403 2404 /* count the number of hits */ 2405 nrows = 0; 2406 for (j=0; j<M; j++) { 2407 if (rowhit[j]) nrows++; 2408 } 2409 c->nrows[i] = nrows; 2410 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2411 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2412 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2413 nrows = 0; 2414 for (j=0; j<M; j++) { 2415 if (rowhit[j]) { 2416 c->rows[i][nrows] = j; 2417 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2418 nrows++; 2419 } 2420 } 2421 } else {/*-------------------------------------------------------------------------------*/ 2422 /* slow version, using rowhit as a linked list */ 2423 PetscInt currentcol,fm,mfm; 2424 rowhit[M] = M; 2425 nrows = 0; 2426 /* loop over columns*/ 2427 for (j=0; j<nctot; j++) { 2428 if (ctype == IS_COLORING_GHOSTED) { 2429 col = ltog[cols[j]]; 2430 } else { 2431 col = cols[j]; 2432 } 2433 if (col >= cstart && col < cend) { 2434 /* column is in diagonal block of matrix */ 2435 rows = A_cj + A_ci[col-cstart]; 2436 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2437 } else { 2438 #if defined (PETSC_USE_CTABLE) 2439 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2440 colb --; 2441 #else 2442 colb = baij->colmap[col] - 1; 2443 #endif 2444 if (colb == -1) { 2445 m = 0; 2446 } else { 2447 colb = colb/bs; 2448 rows = B_cj + B_ci[colb]; 2449 m = B_ci[colb+1] - B_ci[colb]; 2450 } 2451 } 2452 2453 /* loop over columns marking them in rowhit */ 2454 fm = M; /* fm points to first entry in linked list */ 2455 for (k=0; k<m; k++) { 2456 currentcol = *rows++; 2457 /* is it already in the list? */ 2458 do { 2459 mfm = fm; 2460 fm = rowhit[fm]; 2461 } while (fm < currentcol); 2462 /* not in list so add it */ 2463 if (fm != currentcol) { 2464 nrows++; 2465 columnsforrow[currentcol] = col; 2466 /* next three lines insert new entry into linked list */ 2467 rowhit[mfm] = currentcol; 2468 rowhit[currentcol] = fm; 2469 fm = currentcol; 2470 /* fm points to present position in list since we know the columns are sorted */ 2471 } else { 2472 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2473 } 2474 } 2475 } 2476 c->nrows[i] = nrows; 2477 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2478 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2479 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2480 /* now store the linked list of rows into c->rows[i] */ 2481 nrows = 0; 2482 fm = rowhit[M]; 2483 do { 2484 c->rows[i][nrows] = fm; 2485 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2486 fm = rowhit[fm]; 2487 } while (fm < M); 2488 } /* ---------------------------------------------------------------------------------------*/ 2489 ierr = PetscFree(cols);CHKERRQ(ierr); 2490 } 2491 2492 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2493 /* 2494 vscale will contain the "diagonal" on processor scalings followed by the off processor 2495 */ 2496 if (ctype == IS_COLORING_GLOBAL) { 2497 PetscInt *garray; 2498 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2499 for (i=0; i<baij->B->cmap->n/bs; i++) { 2500 for (j=0; j<bs; j++) { 2501 garray[i*bs+j] = bs*baij->garray[i]+j; 2502 } 2503 } 2504 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2505 ierr = PetscFree(garray);CHKERRQ(ierr); 2506 CHKMEMQ; 2507 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2508 for (k=0; k<c->ncolors; k++) { 2509 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2510 for (l=0; l<c->nrows[k]; l++) { 2511 col = c->columnsforrow[k][l]; 2512 if (col >= cstart && col < cend) { 2513 /* column is in diagonal block of matrix */ 2514 colb = col - cstart; 2515 } else { 2516 /* column is in "off-processor" part */ 2517 #if defined (PETSC_USE_CTABLE) 2518 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2519 colb --; 2520 #else 2521 colb = baij->colmap[col] - 1; 2522 #endif 2523 colb = colb/bs; 2524 colb += cend - cstart; 2525 } 2526 c->vscaleforrow[k][l] = colb; 2527 } 2528 } 2529 } else if (ctype == IS_COLORING_GHOSTED) { 2530 /* Get gtol mapping */ 2531 PetscInt N = mat->cmap->N, *gtol; 2532 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2533 for (i=0; i<N; i++) gtol[i] = -1; 2534 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2535 2536 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2537 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2538 for (k=0; k<c->ncolors; k++) { 2539 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2540 for (l=0; l<c->nrows[k]; l++) { 2541 col = c->columnsforrow[k][l]; /* global column index */ 2542 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2543 } 2544 } 2545 ierr = PetscFree(gtol);CHKERRQ(ierr); 2546 } 2547 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2548 2549 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2550 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2551 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2552 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2553 CHKMEMQ; 2554 PetscFunctionReturn(0); 2555 } 2556 2557 #undef __FUNCT__ 2558 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ" 2559 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2560 { 2561 Mat B; 2562 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2563 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2564 Mat_SeqAIJ *b; 2565 PetscErrorCode ierr; 2566 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2567 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2568 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2569 2570 PetscFunctionBegin; 2571 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2572 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2573 2574 /* ---------------------------------------------------------------- 2575 Tell every processor the number of nonzeros per row 2576 */ 2577 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2578 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2579 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2580 } 2581 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2582 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2583 displs = recvcounts + size; 2584 for (i=0; i<size; i++) { 2585 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2586 displs[i] = A->rmap->range[i]/bs; 2587 } 2588 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2589 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2590 #else 2591 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2592 #endif 2593 /* --------------------------------------------------------------- 2594 Create the sequential matrix of the same type as the local block diagonal 2595 */ 2596 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2597 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2598 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2599 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2600 b = (Mat_SeqAIJ *)B->data; 2601 2602 /*-------------------------------------------------------------------- 2603 Copy my part of matrix column indices over 2604 */ 2605 sendcount = ad->nz + bd->nz; 2606 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2607 a_jsendbuf = ad->j; 2608 b_jsendbuf = bd->j; 2609 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2610 cnt = 0; 2611 for (i=0; i<n; i++) { 2612 2613 /* put in lower diagonal portion */ 2614 m = bd->i[i+1] - bd->i[i]; 2615 while (m > 0) { 2616 /* is it above diagonal (in bd (compressed) numbering) */ 2617 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2618 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2619 m--; 2620 } 2621 2622 /* put in diagonal portion */ 2623 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2624 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2625 } 2626 2627 /* put in upper diagonal portion */ 2628 while (m-- > 0) { 2629 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2630 } 2631 } 2632 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2633 2634 /*-------------------------------------------------------------------- 2635 Gather all column indices to all processors 2636 */ 2637 for (i=0; i<size; i++) { 2638 recvcounts[i] = 0; 2639 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2640 recvcounts[i] += lens[j]; 2641 } 2642 } 2643 displs[0] = 0; 2644 for (i=1; i<size; i++) { 2645 displs[i] = displs[i-1] + recvcounts[i-1]; 2646 } 2647 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2648 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2649 #else 2650 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2651 #endif 2652 /*-------------------------------------------------------------------- 2653 Assemble the matrix into useable form (note numerical values not yet set) 2654 */ 2655 /* set the b->ilen (length of each row) values */ 2656 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2657 /* set the b->i indices */ 2658 b->i[0] = 0; 2659 for (i=1; i<=A->rmap->N/bs; i++) { 2660 b->i[i] = b->i[i-1] + lens[i-1]; 2661 } 2662 ierr = PetscFree(lens);CHKERRQ(ierr); 2663 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2664 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2665 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2666 2667 if (A->symmetric){ 2668 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2669 } else if (A->hermitian) { 2670 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2671 } else if (A->structurally_symmetric) { 2672 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2673 } 2674 *newmat = B; 2675 PetscFunctionReturn(0); 2676 } 2677 2678 #undef __FUNCT__ 2679 #define __FUNCT__ "MatSOR_MPIBAIJ" 2680 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2681 { 2682 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2683 PetscErrorCode ierr; 2684 Vec bb1 = 0; 2685 2686 PetscFunctionBegin; 2687 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2688 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2689 } 2690 2691 if (flag == SOR_APPLY_UPPER) { 2692 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2693 PetscFunctionReturn(0); 2694 } 2695 2696 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2697 if (flag & SOR_ZERO_INITIAL_GUESS) { 2698 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2699 its--; 2700 } 2701 2702 while (its--) { 2703 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2704 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2705 2706 /* update rhs: bb1 = bb - B*x */ 2707 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2708 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2709 2710 /* local sweep */ 2711 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2712 } 2713 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 2714 if (flag & SOR_ZERO_INITIAL_GUESS) { 2715 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2716 its--; 2717 } 2718 while (its--) { 2719 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2720 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2721 2722 /* update rhs: bb1 = bb - B*x */ 2723 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2724 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2725 2726 /* local sweep */ 2727 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2728 } 2729 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 2730 if (flag & SOR_ZERO_INITIAL_GUESS) { 2731 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2732 its--; 2733 } 2734 while (its--) { 2735 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2736 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2737 2738 /* update rhs: bb1 = bb - B*x */ 2739 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2740 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2741 2742 /* local sweep */ 2743 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2744 } 2745 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2746 2747 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2748 PetscFunctionReturn(0); 2749 } 2750 2751 extern PetscErrorCode MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2752 2753 #undef __FUNCT__ 2754 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ" 2755 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,PetscScalar **values) 2756 { 2757 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2758 PetscErrorCode ierr; 2759 2760 PetscFunctionBegin; 2761 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2762 PetscFunctionReturn(0); 2763 } 2764 2765 2766 /* -------------------------------------------------------------------*/ 2767 static struct _MatOps MatOps_Values = { 2768 MatSetValues_MPIBAIJ, 2769 MatGetRow_MPIBAIJ, 2770 MatRestoreRow_MPIBAIJ, 2771 MatMult_MPIBAIJ, 2772 /* 4*/ MatMultAdd_MPIBAIJ, 2773 MatMultTranspose_MPIBAIJ, 2774 MatMultTransposeAdd_MPIBAIJ, 2775 0, 2776 0, 2777 0, 2778 /*10*/ 0, 2779 0, 2780 0, 2781 MatSOR_MPIBAIJ, 2782 MatTranspose_MPIBAIJ, 2783 /*15*/ MatGetInfo_MPIBAIJ, 2784 MatEqual_MPIBAIJ, 2785 MatGetDiagonal_MPIBAIJ, 2786 MatDiagonalScale_MPIBAIJ, 2787 MatNorm_MPIBAIJ, 2788 /*20*/ MatAssemblyBegin_MPIBAIJ, 2789 MatAssemblyEnd_MPIBAIJ, 2790 MatSetOption_MPIBAIJ, 2791 MatZeroEntries_MPIBAIJ, 2792 /*24*/ MatZeroRows_MPIBAIJ, 2793 0, 2794 0, 2795 0, 2796 0, 2797 /*29*/ MatSetUpPreallocation_MPIBAIJ, 2798 0, 2799 0, 2800 0, 2801 0, 2802 /*34*/ MatDuplicate_MPIBAIJ, 2803 0, 2804 0, 2805 0, 2806 0, 2807 /*39*/ MatAXPY_MPIBAIJ, 2808 MatGetSubMatrices_MPIBAIJ, 2809 MatIncreaseOverlap_MPIBAIJ, 2810 MatGetValues_MPIBAIJ, 2811 MatCopy_MPIBAIJ, 2812 /*44*/ 0, 2813 MatScale_MPIBAIJ, 2814 0, 2815 0, 2816 0, 2817 /*49*/ MatSetBlockSize_MPIBAIJ, 2818 0, 2819 0, 2820 0, 2821 0, 2822 /*54*/ MatFDColoringCreate_MPIBAIJ, 2823 0, 2824 MatSetUnfactored_MPIBAIJ, 2825 MatPermute_MPIBAIJ, 2826 MatSetValuesBlocked_MPIBAIJ, 2827 /*59*/ MatGetSubMatrix_MPIBAIJ, 2828 MatDestroy_MPIBAIJ, 2829 MatView_MPIBAIJ, 2830 0, 2831 0, 2832 /*64*/ 0, 2833 0, 2834 0, 2835 0, 2836 0, 2837 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2838 0, 2839 0, 2840 0, 2841 0, 2842 /*74*/ 0, 2843 MatFDColoringApply_BAIJ, 2844 0, 2845 0, 2846 0, 2847 /*79*/ 0, 2848 0, 2849 0, 2850 0, 2851 MatLoad_MPIBAIJ, 2852 /*84*/ 0, 2853 0, 2854 0, 2855 0, 2856 0, 2857 /*89*/ 0, 2858 0, 2859 0, 2860 0, 2861 0, 2862 /*94*/ 0, 2863 0, 2864 0, 2865 0, 2866 0, 2867 /*99*/ 0, 2868 0, 2869 0, 2870 0, 2871 0, 2872 /*104*/0, 2873 MatRealPart_MPIBAIJ, 2874 MatImaginaryPart_MPIBAIJ, 2875 0, 2876 0, 2877 /*109*/0, 2878 0, 2879 0, 2880 0, 2881 0, 2882 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2883 0, 2884 MatGetGhosts_MPIBAIJ, 2885 0, 2886 0, 2887 /*119*/0, 2888 0, 2889 0, 2890 0, 2891 0, 2892 /*124*/0, 2893 0, 2894 MatInvertBlockDiagonal_MPIBAIJ 2895 }; 2896 2897 EXTERN_C_BEGIN 2898 #undef __FUNCT__ 2899 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2900 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2901 { 2902 PetscFunctionBegin; 2903 *a = ((Mat_MPIBAIJ *)A->data)->A; 2904 PetscFunctionReturn(0); 2905 } 2906 EXTERN_C_END 2907 2908 EXTERN_C_BEGIN 2909 extern PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2910 EXTERN_C_END 2911 2912 EXTERN_C_BEGIN 2913 #undef __FUNCT__ 2914 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2915 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2916 { 2917 PetscInt m,rstart,cstart,cend; 2918 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2919 const PetscInt *JJ=0; 2920 PetscScalar *values=0; 2921 PetscErrorCode ierr; 2922 2923 PetscFunctionBegin; 2924 2925 if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2926 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2927 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2928 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2929 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2930 m = B->rmap->n/bs; 2931 rstart = B->rmap->rstart/bs; 2932 cstart = B->cmap->rstart/bs; 2933 cend = B->cmap->rend/bs; 2934 2935 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2936 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2937 for (i=0; i<m; i++) { 2938 nz = ii[i+1] - ii[i]; 2939 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2940 nz_max = PetscMax(nz_max,nz); 2941 JJ = jj + ii[i]; 2942 for (j=0; j<nz; j++) { 2943 if (*JJ >= cstart) break; 2944 JJ++; 2945 } 2946 d = 0; 2947 for (; j<nz; j++) { 2948 if (*JJ++ >= cend) break; 2949 d++; 2950 } 2951 d_nnz[i] = d; 2952 o_nnz[i] = nz - d; 2953 } 2954 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2955 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2956 2957 values = (PetscScalar*)V; 2958 if (!values) { 2959 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2960 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2961 } 2962 for (i=0; i<m; i++) { 2963 PetscInt row = i + rstart; 2964 PetscInt ncols = ii[i+1] - ii[i]; 2965 const PetscInt *icols = jj + ii[i]; 2966 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2967 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2968 } 2969 2970 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2971 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2972 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2973 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2974 PetscFunctionReturn(0); 2975 } 2976 EXTERN_C_END 2977 2978 #undef __FUNCT__ 2979 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2980 /*@C 2981 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2982 (the default parallel PETSc format). 2983 2984 Collective on MPI_Comm 2985 2986 Input Parameters: 2987 + A - the matrix 2988 . bs - the block size 2989 . i - the indices into j for the start of each local row (starts with zero) 2990 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2991 - v - optional values in the matrix 2992 2993 Level: developer 2994 2995 .keywords: matrix, aij, compressed row, sparse, parallel 2996 2997 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2998 @*/ 2999 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3000 { 3001 PetscErrorCode ierr; 3002 3003 PetscFunctionBegin; 3004 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3005 PetscValidType(B,1); 3006 PetscValidLogicalCollectiveInt(B,bs,2); 3007 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3008 PetscFunctionReturn(0); 3009 } 3010 3011 EXTERN_C_BEGIN 3012 #undef __FUNCT__ 3013 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 3014 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 3015 { 3016 Mat_MPIBAIJ *b; 3017 PetscErrorCode ierr; 3018 PetscInt i, newbs = PetscAbs(bs); 3019 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3020 3021 PetscFunctionBegin; 3022 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3023 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3024 if (bs < 0) { 3025 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 3026 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 3027 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3028 bs = PetscAbs(bs); 3029 } 3030 if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 3031 bs = newbs; 3032 3033 3034 if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 3035 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3036 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3037 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3038 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3039 3040 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3041 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3042 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3043 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3044 3045 if (d_nnz) { 3046 for (i=0; i<B->rmap->n/bs; i++) { 3047 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 3048 } 3049 } 3050 if (o_nnz) { 3051 for (i=0; i<B->rmap->n/bs; i++) { 3052 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 3053 } 3054 } 3055 3056 b = (Mat_MPIBAIJ*)B->data; 3057 b->bs2 = bs*bs; 3058 b->mbs = B->rmap->n/bs; 3059 b->nbs = B->cmap->n/bs; 3060 b->Mbs = B->rmap->N/bs; 3061 b->Nbs = B->cmap->N/bs; 3062 3063 for (i=0; i<=b->size; i++) { 3064 b->rangebs[i] = B->rmap->range[i]/bs; 3065 } 3066 b->rstartbs = B->rmap->rstart/bs; 3067 b->rendbs = B->rmap->rend/bs; 3068 b->cstartbs = B->cmap->rstart/bs; 3069 b->cendbs = B->cmap->rend/bs; 3070 3071 if (!B->preallocated) { 3072 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3073 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3074 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 3075 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3076 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3077 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3078 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 3079 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3080 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 3081 } 3082 3083 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3084 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 3085 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3086 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3087 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3088 B->preallocated = PETSC_TRUE; 3089 PetscFunctionReturn(0); 3090 } 3091 EXTERN_C_END 3092 3093 EXTERN_C_BEGIN 3094 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 3095 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 3096 EXTERN_C_END 3097 3098 3099 EXTERN_C_BEGIN 3100 #undef __FUNCT__ 3101 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 3102 PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj) 3103 { 3104 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 3105 PetscErrorCode ierr; 3106 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 3107 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 3108 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 3109 3110 PetscFunctionBegin; 3111 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 3112 ii[0] = 0; 3113 CHKMEMQ; 3114 for (i=0; i<M; i++) { 3115 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 3116 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 3117 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 3118 /* remove one from count of matrix has diagonal */ 3119 for (j=id[i]; j<id[i+1]; j++) { 3120 if (jd[j] == i) {ii[i+1]--;break;} 3121 } 3122 CHKMEMQ; 3123 } 3124 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3125 cnt = 0; 3126 for (i=0; i<M; i++) { 3127 for (j=io[i]; j<io[i+1]; j++) { 3128 if (garray[jo[j]] > rstart) break; 3129 jj[cnt++] = garray[jo[j]]; 3130 CHKMEMQ; 3131 } 3132 for (k=id[i]; k<id[i+1]; k++) { 3133 if (jd[k] != i) { 3134 jj[cnt++] = rstart + jd[k]; 3135 CHKMEMQ; 3136 } 3137 } 3138 for (;j<io[i+1]; j++) { 3139 jj[cnt++] = garray[jo[j]]; 3140 CHKMEMQ; 3141 } 3142 } 3143 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 3144 PetscFunctionReturn(0); 3145 } 3146 EXTERN_C_END 3147 3148 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3149 EXTERN_C_BEGIN 3150 PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*); 3151 EXTERN_C_END 3152 3153 EXTERN_C_BEGIN 3154 #undef __FUNCT__ 3155 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 3156 PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat) 3157 { 3158 PetscErrorCode ierr; 3159 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3160 Mat B; 3161 Mat_MPIAIJ *b; 3162 3163 PetscFunctionBegin; 3164 if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled"); 3165 3166 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 3167 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3168 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 3169 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3170 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 3171 b = (Mat_MPIAIJ*) B->data; 3172 3173 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 3174 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 3175 ierr = DisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 3176 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 3177 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 3178 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3179 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3180 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3181 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3182 if (reuse == MAT_REUSE_MATRIX) { 3183 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3184 } else { 3185 *newmat = B; 3186 } 3187 PetscFunctionReturn(0); 3188 } 3189 EXTERN_C_END 3190 3191 EXTERN_C_BEGIN 3192 #if defined(PETSC_HAVE_MUMPS) 3193 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3194 #endif 3195 EXTERN_C_END 3196 3197 /*MC 3198 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 3199 3200 Options Database Keys: 3201 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 3202 . -mat_block_size <bs> - set the blocksize used to store the matrix 3203 - -mat_use_hash_table <fact> 3204 3205 Level: beginner 3206 3207 .seealso: MatCreateMPIBAIJ 3208 M*/ 3209 3210 EXTERN_C_BEGIN 3211 extern PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,const MatType,MatReuse,Mat*); 3212 EXTERN_C_END 3213 3214 EXTERN_C_BEGIN 3215 #undef __FUNCT__ 3216 #define __FUNCT__ "MatCreate_MPIBAIJ" 3217 PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3218 { 3219 Mat_MPIBAIJ *b; 3220 PetscErrorCode ierr; 3221 PetscBool flg; 3222 3223 PetscFunctionBegin; 3224 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 3225 B->data = (void*)b; 3226 3227 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3228 B->assembled = PETSC_FALSE; 3229 3230 B->insertmode = NOT_SET_VALUES; 3231 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 3232 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 3233 3234 /* build local table of row and column ownerships */ 3235 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 3236 3237 /* build cache for off array entries formed */ 3238 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 3239 b->donotstash = PETSC_FALSE; 3240 b->colmap = PETSC_NULL; 3241 b->garray = PETSC_NULL; 3242 b->roworiented = PETSC_TRUE; 3243 3244 /* stuff used in block assembly */ 3245 b->barray = 0; 3246 3247 /* stuff used for matrix vector multiply */ 3248 b->lvec = 0; 3249 b->Mvctx = 0; 3250 3251 /* stuff for MatGetRow() */ 3252 b->rowindices = 0; 3253 b->rowvalues = 0; 3254 b->getrowactive = PETSC_FALSE; 3255 3256 /* hash table stuff */ 3257 b->ht = 0; 3258 b->hd = 0; 3259 b->ht_size = 0; 3260 b->ht_flag = PETSC_FALSE; 3261 b->ht_fact = 0; 3262 b->ht_total_ct = 0; 3263 b->ht_insert_ct = 0; 3264 3265 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 3266 b->ijonly = PETSC_FALSE; 3267 3268 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3269 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3270 if (flg) { 3271 PetscReal fact = 1.39; 3272 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3273 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 3274 if (fact <= 1.0) fact = 1.39; 3275 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3276 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3277 } 3278 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3279 3280 #if defined(PETSC_HAVE_MUMPS) 3281 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3282 #endif 3283 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 3284 "MatConvert_MPIBAIJ_MPIAdj", 3285 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3286 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C", 3287 "MatConvert_MPIBAIJ_MPIAIJ", 3288 MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3289 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C", 3290 "MatConvert_MPIBAIJ_MPISBAIJ", 3291 MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3292 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3293 "MatStoreValues_MPIBAIJ", 3294 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3295 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3296 "MatRetrieveValues_MPIBAIJ", 3297 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3298 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3299 "MatGetDiagonalBlock_MPIBAIJ", 3300 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3301 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3302 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3303 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3304 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3305 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3306 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3307 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3308 "MatDiagonalScaleLocal_MPIBAIJ", 3309 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3310 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3311 "MatSetHashTableFactor_MPIBAIJ", 3312 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3313 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C", 3314 "MatConvert_MPIBAIJ_MPIBSTRM", 3315 MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr); 3316 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3317 PetscFunctionReturn(0); 3318 } 3319 EXTERN_C_END 3320 3321 /*MC 3322 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3323 3324 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3325 and MATMPIBAIJ otherwise. 3326 3327 Options Database Keys: 3328 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3329 3330 Level: beginner 3331 3332 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3333 M*/ 3334 3335 #undef __FUNCT__ 3336 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3337 /*@C 3338 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3339 (block compressed row). For good matrix assembly performance 3340 the user should preallocate the matrix storage by setting the parameters 3341 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3342 performance can be increased by more than a factor of 50. 3343 3344 Collective on Mat 3345 3346 Input Parameters: 3347 + A - the matrix 3348 . bs - size of blockk 3349 . d_nz - number of block nonzeros per block row in diagonal portion of local 3350 submatrix (same for all local rows) 3351 . d_nnz - array containing the number of block nonzeros in the various block rows 3352 of the in diagonal portion of the local (possibly different for each block 3353 row) or PETSC_NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 3354 set it even if it is zero. 3355 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3356 submatrix (same for all local rows). 3357 - o_nnz - array containing the number of nonzeros in the various block rows of the 3358 off-diagonal portion of the local submatrix (possibly different for 3359 each block row) or PETSC_NULL. 3360 3361 If the *_nnz parameter is given then the *_nz parameter is ignored 3362 3363 Options Database Keys: 3364 + -mat_block_size - size of the blocks to use 3365 - -mat_use_hash_table <fact> 3366 3367 Notes: 3368 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3369 than it must be used on all processors that share the object for that argument. 3370 3371 Storage Information: 3372 For a square global matrix we define each processor's diagonal portion 3373 to be its local rows and the corresponding columns (a square submatrix); 3374 each processor's off-diagonal portion encompasses the remainder of the 3375 local matrix (a rectangular submatrix). 3376 3377 The user can specify preallocated storage for the diagonal part of 3378 the local submatrix with either d_nz or d_nnz (not both). Set 3379 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3380 memory allocation. Likewise, specify preallocated storage for the 3381 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3382 3383 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3384 the figure below we depict these three local rows and all columns (0-11). 3385 3386 .vb 3387 0 1 2 3 4 5 6 7 8 9 10 11 3388 ------------------- 3389 row 3 | o o o d d d o o o o o o 3390 row 4 | o o o d d d o o o o o o 3391 row 5 | o o o d d d o o o o o o 3392 ------------------- 3393 .ve 3394 3395 Thus, any entries in the d locations are stored in the d (diagonal) 3396 submatrix, and any entries in the o locations are stored in the 3397 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3398 stored simply in the MATSEQBAIJ format for compressed row storage. 3399 3400 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3401 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3402 In general, for PDE problems in which most nonzeros are near the diagonal, 3403 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3404 or you will get TERRIBLE performance; see the users' manual chapter on 3405 matrices. 3406 3407 You can call MatGetInfo() to get information on how effective the preallocation was; 3408 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3409 You can also run with the option -info and look for messages with the string 3410 malloc in them to see if additional memory allocation was needed. 3411 3412 Level: intermediate 3413 3414 .keywords: matrix, block, aij, compressed row, sparse, parallel 3415 3416 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 3417 @*/ 3418 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3419 { 3420 PetscErrorCode ierr; 3421 3422 PetscFunctionBegin; 3423 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3424 PetscValidType(B,1); 3425 PetscValidLogicalCollectiveInt(B,bs,2); 3426 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3427 PetscFunctionReturn(0); 3428 } 3429 3430 #undef __FUNCT__ 3431 #define __FUNCT__ "MatCreateMPIBAIJ" 3432 /*@C 3433 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 3434 (block compressed row). For good matrix assembly performance 3435 the user should preallocate the matrix storage by setting the parameters 3436 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3437 performance can be increased by more than a factor of 50. 3438 3439 Collective on MPI_Comm 3440 3441 Input Parameters: 3442 + comm - MPI communicator 3443 . bs - size of blockk 3444 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3445 This value should be the same as the local size used in creating the 3446 y vector for the matrix-vector product y = Ax. 3447 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3448 This value should be the same as the local size used in creating the 3449 x vector for the matrix-vector product y = Ax. 3450 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3451 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3452 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3453 submatrix (same for all local rows) 3454 . d_nnz - array containing the number of nonzero blocks in the various block rows 3455 of the in diagonal portion of the local (possibly different for each block 3456 row) or PETSC_NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3457 and set it even if it is zero. 3458 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3459 submatrix (same for all local rows). 3460 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3461 off-diagonal portion of the local submatrix (possibly different for 3462 each block row) or PETSC_NULL. 3463 3464 Output Parameter: 3465 . A - the matrix 3466 3467 Options Database Keys: 3468 + -mat_block_size - size of the blocks to use 3469 - -mat_use_hash_table <fact> 3470 3471 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3472 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3473 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3474 3475 Notes: 3476 If the *_nnz parameter is given then the *_nz parameter is ignored 3477 3478 A nonzero block is any block that as 1 or more nonzeros in it 3479 3480 The user MUST specify either the local or global matrix dimensions 3481 (possibly both). 3482 3483 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3484 than it must be used on all processors that share the object for that argument. 3485 3486 Storage Information: 3487 For a square global matrix we define each processor's diagonal portion 3488 to be its local rows and the corresponding columns (a square submatrix); 3489 each processor's off-diagonal portion encompasses the remainder of the 3490 local matrix (a rectangular submatrix). 3491 3492 The user can specify preallocated storage for the diagonal part of 3493 the local submatrix with either d_nz or d_nnz (not both). Set 3494 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3495 memory allocation. Likewise, specify preallocated storage for the 3496 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3497 3498 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3499 the figure below we depict these three local rows and all columns (0-11). 3500 3501 .vb 3502 0 1 2 3 4 5 6 7 8 9 10 11 3503 ------------------- 3504 row 3 | o o o d d d o o o o o o 3505 row 4 | o o o d d d o o o o o o 3506 row 5 | o o o d d d o o o o o o 3507 ------------------- 3508 .ve 3509 3510 Thus, any entries in the d locations are stored in the d (diagonal) 3511 submatrix, and any entries in the o locations are stored in the 3512 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3513 stored simply in the MATSEQBAIJ format for compressed row storage. 3514 3515 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3516 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3517 In general, for PDE problems in which most nonzeros are near the diagonal, 3518 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3519 or you will get TERRIBLE performance; see the users' manual chapter on 3520 matrices. 3521 3522 Level: intermediate 3523 3524 .keywords: matrix, block, aij, compressed row, sparse, parallel 3525 3526 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3527 @*/ 3528 PetscErrorCode MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3529 { 3530 PetscErrorCode ierr; 3531 PetscMPIInt size; 3532 3533 PetscFunctionBegin; 3534 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3535 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3536 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3537 if (size > 1) { 3538 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3539 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3540 } else { 3541 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3542 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3543 } 3544 PetscFunctionReturn(0); 3545 } 3546 3547 #undef __FUNCT__ 3548 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3549 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3550 { 3551 Mat mat; 3552 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3553 PetscErrorCode ierr; 3554 PetscInt len=0; 3555 3556 PetscFunctionBegin; 3557 *newmat = 0; 3558 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3559 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3560 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3561 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3562 3563 mat->factortype = matin->factortype; 3564 mat->preallocated = PETSC_TRUE; 3565 mat->assembled = PETSC_TRUE; 3566 mat->insertmode = NOT_SET_VALUES; 3567 3568 a = (Mat_MPIBAIJ*)mat->data; 3569 mat->rmap->bs = matin->rmap->bs; 3570 a->bs2 = oldmat->bs2; 3571 a->mbs = oldmat->mbs; 3572 a->nbs = oldmat->nbs; 3573 a->Mbs = oldmat->Mbs; 3574 a->Nbs = oldmat->Nbs; 3575 3576 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3577 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3578 3579 a->size = oldmat->size; 3580 a->rank = oldmat->rank; 3581 a->donotstash = oldmat->donotstash; 3582 a->roworiented = oldmat->roworiented; 3583 a->rowindices = 0; 3584 a->rowvalues = 0; 3585 a->getrowactive = PETSC_FALSE; 3586 a->barray = 0; 3587 a->rstartbs = oldmat->rstartbs; 3588 a->rendbs = oldmat->rendbs; 3589 a->cstartbs = oldmat->cstartbs; 3590 a->cendbs = oldmat->cendbs; 3591 3592 /* hash table stuff */ 3593 a->ht = 0; 3594 a->hd = 0; 3595 a->ht_size = 0; 3596 a->ht_flag = oldmat->ht_flag; 3597 a->ht_fact = oldmat->ht_fact; 3598 a->ht_total_ct = 0; 3599 a->ht_insert_ct = 0; 3600 3601 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3602 if (oldmat->colmap) { 3603 #if defined (PETSC_USE_CTABLE) 3604 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3605 #else 3606 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3607 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3608 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3609 #endif 3610 } else a->colmap = 0; 3611 3612 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3613 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3614 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3615 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3616 } else a->garray = 0; 3617 3618 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3619 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3620 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3621 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3622 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3623 3624 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3625 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3626 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3627 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3628 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3629 *newmat = mat; 3630 3631 PetscFunctionReturn(0); 3632 } 3633 3634 #undef __FUNCT__ 3635 #define __FUNCT__ "MatLoad_MPIBAIJ" 3636 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3637 { 3638 PetscErrorCode ierr; 3639 int fd; 3640 PetscInt i,nz,j,rstart,rend; 3641 PetscScalar *vals,*buf; 3642 MPI_Comm comm = ((PetscObject)viewer)->comm; 3643 MPI_Status status; 3644 PetscMPIInt rank,size,maxnz; 3645 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3646 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3647 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3648 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3649 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3650 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3651 3652 PetscFunctionBegin; 3653 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3654 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3655 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3656 3657 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3658 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3659 if (!rank) { 3660 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3661 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3662 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3663 } 3664 3665 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3666 3667 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3668 M = header[1]; N = header[2]; 3669 3670 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3671 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3672 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3673 3674 /* If global sizes are set, check if they are consistent with that given in the file */ 3675 if (sizesset) { 3676 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3677 } 3678 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3679 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3680 3681 if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices"); 3682 3683 /* 3684 This code adds extra rows to make sure the number of rows is 3685 divisible by the blocksize 3686 */ 3687 Mbs = M/bs; 3688 extra_rows = bs - M + bs*Mbs; 3689 if (extra_rows == bs) extra_rows = 0; 3690 else Mbs++; 3691 if (extra_rows && !rank) { 3692 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3693 } 3694 3695 /* determine ownership of all rows */ 3696 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3697 mbs = Mbs/size + ((Mbs % size) > rank); 3698 m = mbs*bs; 3699 } else { /* User set */ 3700 m = newmat->rmap->n; 3701 mbs = m/bs; 3702 } 3703 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3704 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3705 3706 /* process 0 needs enough room for process with most rows */ 3707 if (!rank) { 3708 mmax = rowners[1]; 3709 for (i=2; i<size; i++) { 3710 mmax = PetscMax(mmax,rowners[i]); 3711 } 3712 mmax*=bs; 3713 } else mmax = m; 3714 3715 rowners[0] = 0; 3716 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3717 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3718 rstart = rowners[rank]; 3719 rend = rowners[rank+1]; 3720 3721 /* distribute row lengths to all processors */ 3722 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3723 if (!rank) { 3724 mend = m; 3725 if (size == 1) mend = mend - extra_rows; 3726 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3727 for (j=mend; j<m; j++) locrowlens[j] = 1; 3728 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3729 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3730 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3731 for (j=0; j<m; j++) { 3732 procsnz[0] += locrowlens[j]; 3733 } 3734 for (i=1; i<size; i++) { 3735 mend = browners[i+1] - browners[i]; 3736 if (i == size-1) mend = mend - extra_rows; 3737 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3738 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3739 /* calculate the number of nonzeros on each processor */ 3740 for (j=0; j<browners[i+1]-browners[i]; j++) { 3741 procsnz[i] += rowlengths[j]; 3742 } 3743 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3744 } 3745 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3746 } else { 3747 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3748 } 3749 3750 if (!rank) { 3751 /* determine max buffer needed and allocate it */ 3752 maxnz = procsnz[0]; 3753 for (i=1; i<size; i++) { 3754 maxnz = PetscMax(maxnz,procsnz[i]); 3755 } 3756 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3757 3758 /* read in my part of the matrix column indices */ 3759 nz = procsnz[0]; 3760 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3761 mycols = ibuf; 3762 if (size == 1) nz -= extra_rows; 3763 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3764 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3765 3766 /* read in every ones (except the last) and ship off */ 3767 for (i=1; i<size-1; i++) { 3768 nz = procsnz[i]; 3769 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3770 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3771 } 3772 /* read in the stuff for the last proc */ 3773 if (size != 1) { 3774 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3775 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3776 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3777 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3778 } 3779 ierr = PetscFree(cols);CHKERRQ(ierr); 3780 } else { 3781 /* determine buffer space needed for message */ 3782 nz = 0; 3783 for (i=0; i<m; i++) { 3784 nz += locrowlens[i]; 3785 } 3786 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3787 mycols = ibuf; 3788 /* receive message of column indices*/ 3789 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3790 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3791 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3792 } 3793 3794 /* loop over local rows, determining number of off diagonal entries */ 3795 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3796 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3797 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3798 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3799 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3800 rowcount = 0; nzcount = 0; 3801 for (i=0; i<mbs; i++) { 3802 dcount = 0; 3803 odcount = 0; 3804 for (j=0; j<bs; j++) { 3805 kmax = locrowlens[rowcount]; 3806 for (k=0; k<kmax; k++) { 3807 tmp = mycols[nzcount++]/bs; 3808 if (!mask[tmp]) { 3809 mask[tmp] = 1; 3810 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3811 else masked1[dcount++] = tmp; 3812 } 3813 } 3814 rowcount++; 3815 } 3816 3817 dlens[i] = dcount; 3818 odlens[i] = odcount; 3819 3820 /* zero out the mask elements we set */ 3821 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3822 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3823 } 3824 3825 3826 if (!sizesset) { 3827 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3828 } 3829 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3830 3831 if (!rank) { 3832 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3833 /* read in my part of the matrix numerical values */ 3834 nz = procsnz[0]; 3835 vals = buf; 3836 mycols = ibuf; 3837 if (size == 1) nz -= extra_rows; 3838 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3839 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3840 3841 /* insert into matrix */ 3842 jj = rstart*bs; 3843 for (i=0; i<m; i++) { 3844 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3845 mycols += locrowlens[i]; 3846 vals += locrowlens[i]; 3847 jj++; 3848 } 3849 /* read in other processors (except the last one) and ship out */ 3850 for (i=1; i<size-1; i++) { 3851 nz = procsnz[i]; 3852 vals = buf; 3853 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3854 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3855 } 3856 /* the last proc */ 3857 if (size != 1){ 3858 nz = procsnz[i] - extra_rows; 3859 vals = buf; 3860 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3861 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3862 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3863 } 3864 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3865 } else { 3866 /* receive numeric values */ 3867 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3868 3869 /* receive message of values*/ 3870 vals = buf; 3871 mycols = ibuf; 3872 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 3873 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 3874 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3875 3876 /* insert into matrix */ 3877 jj = rstart*bs; 3878 for (i=0; i<m; i++) { 3879 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3880 mycols += locrowlens[i]; 3881 vals += locrowlens[i]; 3882 jj++; 3883 } 3884 } 3885 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3886 ierr = PetscFree(buf);CHKERRQ(ierr); 3887 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3888 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3889 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3890 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3891 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3892 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3893 3894 PetscFunctionReturn(0); 3895 } 3896 3897 #undef __FUNCT__ 3898 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3899 /*@ 3900 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3901 3902 Input Parameters: 3903 . mat - the matrix 3904 . fact - factor 3905 3906 Not Collective, each process can use a different factor 3907 3908 Level: advanced 3909 3910 Notes: 3911 This can also be set by the command line option: -mat_use_hash_table <fact> 3912 3913 .keywords: matrix, hashtable, factor, HT 3914 3915 .seealso: MatSetOption() 3916 @*/ 3917 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3918 { 3919 PetscErrorCode ierr; 3920 3921 PetscFunctionBegin; 3922 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3923 PetscFunctionReturn(0); 3924 } 3925 3926 EXTERN_C_BEGIN 3927 #undef __FUNCT__ 3928 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3929 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3930 { 3931 Mat_MPIBAIJ *baij; 3932 3933 PetscFunctionBegin; 3934 baij = (Mat_MPIBAIJ*)mat->data; 3935 baij->ht_fact = fact; 3936 PetscFunctionReturn(0); 3937 } 3938 EXTERN_C_END 3939 3940 #undef __FUNCT__ 3941 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3942 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3943 { 3944 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3945 PetscFunctionBegin; 3946 *Ad = a->A; 3947 *Ao = a->B; 3948 *colmap = a->garray; 3949 PetscFunctionReturn(0); 3950 } 3951 3952 /* 3953 Special version for direct calls from Fortran (to eliminate two function call overheads 3954 */ 3955 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3956 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3957 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3958 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3959 #endif 3960 3961 #undef __FUNCT__ 3962 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3963 /*@C 3964 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3965 3966 Collective on Mat 3967 3968 Input Parameters: 3969 + mat - the matrix 3970 . min - number of input rows 3971 . im - input rows 3972 . nin - number of input columns 3973 . in - input columns 3974 . v - numerical values input 3975 - addvin - INSERT_VALUES or ADD_VALUES 3976 3977 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3978 3979 Level: advanced 3980 3981 .seealso: MatSetValuesBlocked() 3982 @*/ 3983 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3984 { 3985 /* convert input arguments to C version */ 3986 Mat mat = *matin; 3987 PetscInt m = *min, n = *nin; 3988 InsertMode addv = *addvin; 3989 3990 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3991 const MatScalar *value; 3992 MatScalar *barray=baij->barray; 3993 PetscBool roworiented = baij->roworiented; 3994 PetscErrorCode ierr; 3995 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3996 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3997 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3998 3999 PetscFunctionBegin; 4000 /* tasks normally handled by MatSetValuesBlocked() */ 4001 if (mat->insertmode == NOT_SET_VALUES) { 4002 mat->insertmode = addv; 4003 } 4004 #if defined(PETSC_USE_DEBUG) 4005 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 4006 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 4007 #endif 4008 if (mat->assembled) { 4009 mat->was_assembled = PETSC_TRUE; 4010 mat->assembled = PETSC_FALSE; 4011 } 4012 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4013 4014 4015 if(!barray) { 4016 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 4017 baij->barray = barray; 4018 } 4019 4020 if (roworiented) { 4021 stepval = (n-1)*bs; 4022 } else { 4023 stepval = (m-1)*bs; 4024 } 4025 for (i=0; i<m; i++) { 4026 if (im[i] < 0) continue; 4027 #if defined(PETSC_USE_DEBUG) 4028 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 4029 #endif 4030 if (im[i] >= rstart && im[i] < rend) { 4031 row = im[i] - rstart; 4032 for (j=0; j<n; j++) { 4033 /* If NumCol = 1 then a copy is not required */ 4034 if ((roworiented) && (n == 1)) { 4035 barray = (MatScalar*)v + i*bs2; 4036 } else if((!roworiented) && (m == 1)) { 4037 barray = (MatScalar*)v + j*bs2; 4038 } else { /* Here a copy is required */ 4039 if (roworiented) { 4040 value = v + i*(stepval+bs)*bs + j*bs; 4041 } else { 4042 value = v + j*(stepval+bs)*bs + i*bs; 4043 } 4044 for (ii=0; ii<bs; ii++,value+=stepval) { 4045 for (jj=0; jj<bs; jj++) { 4046 *barray++ = *value++; 4047 } 4048 } 4049 barray -=bs2; 4050 } 4051 4052 if (in[j] >= cstart && in[j] < cend){ 4053 col = in[j] - cstart; 4054 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4055 } 4056 else if (in[j] < 0) continue; 4057 #if defined(PETSC_USE_DEBUG) 4058 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 4059 #endif 4060 else { 4061 if (mat->was_assembled) { 4062 if (!baij->colmap) { 4063 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 4064 } 4065 4066 #if defined(PETSC_USE_DEBUG) 4067 #if defined (PETSC_USE_CTABLE) 4068 { PetscInt data; 4069 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 4070 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4071 } 4072 #else 4073 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4074 #endif 4075 #endif 4076 #if defined (PETSC_USE_CTABLE) 4077 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4078 col = (col - 1)/bs; 4079 #else 4080 col = (baij->colmap[in[j]] - 1)/bs; 4081 #endif 4082 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 4083 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 4084 col = in[j]; 4085 } 4086 } 4087 else col = in[j]; 4088 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4089 } 4090 } 4091 } else { 4092 if (!baij->donotstash) { 4093 if (roworiented) { 4094 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4095 } else { 4096 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4097 } 4098 } 4099 } 4100 } 4101 4102 /* task normally handled by MatSetValuesBlocked() */ 4103 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4104 PetscFunctionReturn(0); 4105 } 4106 4107 #undef __FUNCT__ 4108 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 4109 /*@ 4110 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 4111 CSR format the local rows. 4112 4113 Collective on MPI_Comm 4114 4115 Input Parameters: 4116 + comm - MPI communicator 4117 . bs - the block size, only a block size of 1 is supported 4118 . m - number of local rows (Cannot be PETSC_DECIDE) 4119 . n - This value should be the same as the local size used in creating the 4120 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4121 calculated if N is given) For square matrices n is almost always m. 4122 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4123 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4124 . i - row indices 4125 . j - column indices 4126 - a - matrix values 4127 4128 Output Parameter: 4129 . mat - the matrix 4130 4131 Level: intermediate 4132 4133 Notes: 4134 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4135 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4136 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4137 4138 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4139 4140 .keywords: matrix, aij, compressed row, sparse, parallel 4141 4142 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4143 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 4144 @*/ 4145 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4146 { 4147 PetscErrorCode ierr; 4148 4149 4150 PetscFunctionBegin; 4151 if (i[0]) { 4152 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4153 } 4154 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4155 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4156 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4157 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 4158 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 4159 PetscFunctionReturn(0); 4160 } 4161