1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 extern PetscErrorCode DisAssemble_MPIBAIJ(Mat); 7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec); 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 17 { 18 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 19 PetscErrorCode ierr; 20 PetscInt i,*idxb = 0; 21 PetscScalar *va,*vb; 22 Vec vtmp; 23 24 PetscFunctionBegin; 25 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 26 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 27 if (idx) { 28 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 29 } 30 31 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 32 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 33 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 34 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 35 36 for (i=0; i<A->rmap->n; i++){ 37 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 38 } 39 40 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 41 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 42 ierr = PetscFree(idxb);CHKERRQ(ierr); 43 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 44 PetscFunctionReturn(0); 45 } 46 47 EXTERN_C_BEGIN 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 50 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 51 { 52 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 53 PetscErrorCode ierr; 54 55 PetscFunctionBegin; 56 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 57 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 EXTERN_C_END 61 62 EXTERN_C_BEGIN 63 #undef __FUNCT__ 64 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 65 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 66 { 67 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 68 PetscErrorCode ierr; 69 70 PetscFunctionBegin; 71 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 72 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 73 PetscFunctionReturn(0); 74 } 75 EXTERN_C_END 76 77 /* 78 Local utility routine that creates a mapping from the global column 79 number to the local number in the off-diagonal part of the local 80 storage of the matrix. This is done in a non scalable way since the 81 length of colmap equals the global matrix length. 82 */ 83 #undef __FUNCT__ 84 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 85 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 86 { 87 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 88 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 89 PetscErrorCode ierr; 90 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 91 92 PetscFunctionBegin; 93 #if defined (PETSC_USE_CTABLE) 94 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 95 for (i=0; i<nbs; i++){ 96 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 97 } 98 #else 99 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 100 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 101 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 102 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 103 #endif 104 PetscFunctionReturn(0); 105 } 106 107 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 108 { \ 109 \ 110 brow = row/bs; \ 111 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 112 rmax = aimax[brow]; nrow = ailen[brow]; \ 113 bcol = col/bs; \ 114 ridx = row % bs; cidx = col % bs; \ 115 low = 0; high = nrow; \ 116 while (high-low > 3) { \ 117 t = (low+high)/2; \ 118 if (rp[t] > bcol) high = t; \ 119 else low = t; \ 120 } \ 121 for (_i=low; _i<high; _i++) { \ 122 if (rp[_i] > bcol) break; \ 123 if (rp[_i] == bcol) { \ 124 bap = ap + bs2*_i + bs*cidx + ridx; \ 125 if (addv == ADD_VALUES) *bap += value; \ 126 else *bap = value; \ 127 goto a_noinsert; \ 128 } \ 129 } \ 130 if (a->nonew == 1) goto a_noinsert; \ 131 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 132 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 133 N = nrow++ - 1; \ 134 /* shift up all the later entries in this row */ \ 135 for (ii=N; ii>=_i; ii--) { \ 136 rp[ii+1] = rp[ii]; \ 137 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 138 } \ 139 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 140 rp[_i] = bcol; \ 141 ap[bs2*_i + bs*cidx + ridx] = value; \ 142 a_noinsert:; \ 143 ailen[brow] = nrow; \ 144 } 145 146 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 147 { \ 148 brow = row/bs; \ 149 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 150 rmax = bimax[brow]; nrow = bilen[brow]; \ 151 bcol = col/bs; \ 152 ridx = row % bs; cidx = col % bs; \ 153 low = 0; high = nrow; \ 154 while (high-low > 3) { \ 155 t = (low+high)/2; \ 156 if (rp[t] > bcol) high = t; \ 157 else low = t; \ 158 } \ 159 for (_i=low; _i<high; _i++) { \ 160 if (rp[_i] > bcol) break; \ 161 if (rp[_i] == bcol) { \ 162 bap = ap + bs2*_i + bs*cidx + ridx; \ 163 if (addv == ADD_VALUES) *bap += value; \ 164 else *bap = value; \ 165 goto b_noinsert; \ 166 } \ 167 } \ 168 if (b->nonew == 1) goto b_noinsert; \ 169 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 170 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 171 CHKMEMQ;\ 172 N = nrow++ - 1; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 177 } \ 178 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 179 rp[_i] = bcol; \ 180 ap[bs2*_i + bs*cidx + ridx] = value; \ 181 b_noinsert:; \ 182 bilen[brow] = nrow; \ 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatSetValues_MPIBAIJ" 187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 188 { 189 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 190 MatScalar value; 191 PetscBool roworiented = baij->roworiented; 192 PetscErrorCode ierr; 193 PetscInt i,j,row,col; 194 PetscInt rstart_orig=mat->rmap->rstart; 195 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 196 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 197 198 /* Some Variables required in the macro */ 199 Mat A = baij->A; 200 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 201 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 202 MatScalar *aa=a->a; 203 204 Mat B = baij->B; 205 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 206 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 207 MatScalar *ba=b->a; 208 209 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 210 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 211 MatScalar *ap,*bap; 212 213 PetscFunctionBegin; 214 if (v) PetscValidScalarPointer(v,6); 215 for (i=0; i<m; i++) { 216 if (im[i] < 0) continue; 217 #if defined(PETSC_USE_DEBUG) 218 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 219 #endif 220 if (im[i] >= rstart_orig && im[i] < rend_orig) { 221 row = im[i] - rstart_orig; 222 for (j=0; j<n; j++) { 223 if (in[j] >= cstart_orig && in[j] < cend_orig){ 224 col = in[j] - cstart_orig; 225 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 226 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 227 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 228 } else if (in[j] < 0) continue; 229 #if defined(PETSC_USE_DEBUG) 230 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 231 #endif 232 else { 233 if (mat->was_assembled) { 234 if (!baij->colmap) { 235 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 236 } 237 #if defined (PETSC_USE_CTABLE) 238 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 239 col = col - 1; 240 #else 241 col = baij->colmap[in[j]/bs] - 1; 242 #endif 243 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 244 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 245 col = in[j]; 246 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 247 B = baij->B; 248 b = (Mat_SeqBAIJ*)(B)->data; 249 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 250 ba=b->a; 251 } else col += in[j]%bs; 252 } else col = in[j]; 253 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 254 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 255 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 256 } 257 } 258 } else { 259 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 260 if (!baij->donotstash) { 261 if (roworiented) { 262 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 263 } else { 264 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 265 } 266 } 267 } 268 } 269 PetscFunctionReturn(0); 270 } 271 272 #undef __FUNCT__ 273 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 274 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 275 { 276 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 277 const PetscScalar *value; 278 MatScalar *barray=baij->barray; 279 PetscBool roworiented = baij->roworiented; 280 PetscErrorCode ierr; 281 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 282 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 283 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 284 285 PetscFunctionBegin; 286 if(!barray) { 287 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 288 baij->barray = barray; 289 } 290 291 if (roworiented) { 292 stepval = (n-1)*bs; 293 } else { 294 stepval = (m-1)*bs; 295 } 296 for (i=0; i<m; i++) { 297 if (im[i] < 0) continue; 298 #if defined(PETSC_USE_DEBUG) 299 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 300 #endif 301 if (im[i] >= rstart && im[i] < rend) { 302 row = im[i] - rstart; 303 for (j=0; j<n; j++) { 304 /* If NumCol = 1 then a copy is not required */ 305 if ((roworiented) && (n == 1)) { 306 barray = (MatScalar*)v + i*bs2; 307 } else if((!roworiented) && (m == 1)) { 308 barray = (MatScalar*)v + j*bs2; 309 } else { /* Here a copy is required */ 310 if (roworiented) { 311 value = v + (i*(stepval+bs) + j)*bs; 312 } else { 313 value = v + (j*(stepval+bs) + i)*bs; 314 } 315 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 316 for (jj=0; jj<bs; jj++) { 317 barray[jj] = value[jj]; 318 } 319 barray += bs; 320 } 321 barray -= bs2; 322 } 323 324 if (in[j] >= cstart && in[j] < cend){ 325 col = in[j] - cstart; 326 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 327 } 328 else if (in[j] < 0) continue; 329 #if defined(PETSC_USE_DEBUG) 330 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 331 #endif 332 else { 333 if (mat->was_assembled) { 334 if (!baij->colmap) { 335 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 336 } 337 338 #if defined(PETSC_USE_DEBUG) 339 #if defined (PETSC_USE_CTABLE) 340 { PetscInt data; 341 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 342 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 343 } 344 #else 345 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 346 #endif 347 #endif 348 #if defined (PETSC_USE_CTABLE) 349 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 350 col = (col - 1)/bs; 351 #else 352 col = (baij->colmap[in[j]] - 1)/bs; 353 #endif 354 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 355 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 356 col = in[j]; 357 } 358 } 359 else col = in[j]; 360 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 361 } 362 } 363 } else { 364 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 365 if (!baij->donotstash) { 366 if (roworiented) { 367 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 368 } else { 369 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 370 } 371 } 372 } 373 } 374 PetscFunctionReturn(0); 375 } 376 377 #define HASH_KEY 0.6180339887 378 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 379 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 380 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 381 #undef __FUNCT__ 382 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 383 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 384 { 385 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 386 PetscBool roworiented = baij->roworiented; 387 PetscErrorCode ierr; 388 PetscInt i,j,row,col; 389 PetscInt rstart_orig=mat->rmap->rstart; 390 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 391 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 392 PetscReal tmp; 393 MatScalar **HD = baij->hd,value; 394 #if defined(PETSC_USE_DEBUG) 395 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 396 #endif 397 398 PetscFunctionBegin; 399 if (v) PetscValidScalarPointer(v,6); 400 for (i=0; i<m; i++) { 401 #if defined(PETSC_USE_DEBUG) 402 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 403 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 404 #endif 405 row = im[i]; 406 if (row >= rstart_orig && row < rend_orig) { 407 for (j=0; j<n; j++) { 408 col = in[j]; 409 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 410 /* Look up PetscInto the Hash Table */ 411 key = (row/bs)*Nbs+(col/bs)+1; 412 h1 = HASH(size,key,tmp); 413 414 415 idx = h1; 416 #if defined(PETSC_USE_DEBUG) 417 insert_ct++; 418 total_ct++; 419 if (HT[idx] != key) { 420 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 421 if (idx == size) { 422 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 423 if (idx == h1) { 424 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 425 } 426 } 427 } 428 #else 429 if (HT[idx] != key) { 430 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 431 if (idx == size) { 432 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 433 if (idx == h1) { 434 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 435 } 436 } 437 } 438 #endif 439 /* A HASH table entry is found, so insert the values at the correct address */ 440 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 441 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 442 } 443 } else { 444 if (!baij->donotstash) { 445 if (roworiented) { 446 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 447 } else { 448 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 449 } 450 } 451 } 452 } 453 #if defined(PETSC_USE_DEBUG) 454 baij->ht_total_ct = total_ct; 455 baij->ht_insert_ct = insert_ct; 456 #endif 457 PetscFunctionReturn(0); 458 } 459 460 #undef __FUNCT__ 461 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 462 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 463 { 464 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 465 PetscBool roworiented = baij->roworiented; 466 PetscErrorCode ierr; 467 PetscInt i,j,ii,jj,row,col; 468 PetscInt rstart=baij->rstartbs; 469 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 470 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 471 PetscReal tmp; 472 MatScalar **HD = baij->hd,*baij_a; 473 const PetscScalar *v_t,*value; 474 #if defined(PETSC_USE_DEBUG) 475 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 476 #endif 477 478 PetscFunctionBegin; 479 480 if (roworiented) { 481 stepval = (n-1)*bs; 482 } else { 483 stepval = (m-1)*bs; 484 } 485 for (i=0; i<m; i++) { 486 #if defined(PETSC_USE_DEBUG) 487 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 488 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 489 #endif 490 row = im[i]; 491 v_t = v + i*nbs2; 492 if (row >= rstart && row < rend) { 493 for (j=0; j<n; j++) { 494 col = in[j]; 495 496 /* Look up into the Hash Table */ 497 key = row*Nbs+col+1; 498 h1 = HASH(size,key,tmp); 499 500 idx = h1; 501 #if defined(PETSC_USE_DEBUG) 502 total_ct++; 503 insert_ct++; 504 if (HT[idx] != key) { 505 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 506 if (idx == size) { 507 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 508 if (idx == h1) { 509 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 510 } 511 } 512 } 513 #else 514 if (HT[idx] != key) { 515 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 516 if (idx == size) { 517 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 518 if (idx == h1) { 519 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 520 } 521 } 522 } 523 #endif 524 baij_a = HD[idx]; 525 if (roworiented) { 526 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 527 /* value = v + (i*(stepval+bs)+j)*bs; */ 528 value = v_t; 529 v_t += bs; 530 if (addv == ADD_VALUES) { 531 for (ii=0; ii<bs; ii++,value+=stepval) { 532 for (jj=ii; jj<bs2; jj+=bs) { 533 baij_a[jj] += *value++; 534 } 535 } 536 } else { 537 for (ii=0; ii<bs; ii++,value+=stepval) { 538 for (jj=ii; jj<bs2; jj+=bs) { 539 baij_a[jj] = *value++; 540 } 541 } 542 } 543 } else { 544 value = v + j*(stepval+bs)*bs + i*bs; 545 if (addv == ADD_VALUES) { 546 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 547 for (jj=0; jj<bs; jj++) { 548 baij_a[jj] += *value++; 549 } 550 } 551 } else { 552 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 553 for (jj=0; jj<bs; jj++) { 554 baij_a[jj] = *value++; 555 } 556 } 557 } 558 } 559 } 560 } else { 561 if (!baij->donotstash) { 562 if (roworiented) { 563 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 564 } else { 565 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 566 } 567 } 568 } 569 } 570 #if defined(PETSC_USE_DEBUG) 571 baij->ht_total_ct = total_ct; 572 baij->ht_insert_ct = insert_ct; 573 #endif 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatGetValues_MPIBAIJ" 579 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 580 { 581 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 582 PetscErrorCode ierr; 583 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 584 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 585 586 PetscFunctionBegin; 587 for (i=0; i<m; i++) { 588 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 589 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 590 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 591 row = idxm[i] - bsrstart; 592 for (j=0; j<n; j++) { 593 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 594 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 595 if (idxn[j] >= bscstart && idxn[j] < bscend){ 596 col = idxn[j] - bscstart; 597 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 598 } else { 599 if (!baij->colmap) { 600 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 601 } 602 #if defined (PETSC_USE_CTABLE) 603 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 604 data --; 605 #else 606 data = baij->colmap[idxn[j]/bs]-1; 607 #endif 608 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 609 else { 610 col = data + idxn[j]%bs; 611 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 612 } 613 } 614 } 615 } else { 616 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 617 } 618 } 619 PetscFunctionReturn(0); 620 } 621 622 #undef __FUNCT__ 623 #define __FUNCT__ "MatNorm_MPIBAIJ" 624 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 625 { 626 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 627 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 628 PetscErrorCode ierr; 629 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 630 PetscReal sum = 0.0; 631 MatScalar *v; 632 633 PetscFunctionBegin; 634 if (baij->size == 1) { 635 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 636 } else { 637 if (type == NORM_FROBENIUS) { 638 v = amat->a; 639 nz = amat->nz*bs2; 640 for (i=0; i<nz; i++) { 641 #if defined(PETSC_USE_COMPLEX) 642 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 643 #else 644 sum += (*v)*(*v); v++; 645 #endif 646 } 647 v = bmat->a; 648 nz = bmat->nz*bs2; 649 for (i=0; i<nz; i++) { 650 #if defined(PETSC_USE_COMPLEX) 651 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 652 #else 653 sum += (*v)*(*v); v++; 654 #endif 655 } 656 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 657 *nrm = PetscSqrtReal(*nrm); 658 } else if (type == NORM_1) { /* max column sum */ 659 PetscReal *tmp,*tmp2; 660 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 661 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 662 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 663 v = amat->a; jj = amat->j; 664 for (i=0; i<amat->nz; i++) { 665 for (j=0; j<bs; j++){ 666 col = bs*(cstart + *jj) + j; /* column index */ 667 for (row=0; row<bs; row++){ 668 tmp[col] += PetscAbsScalar(*v); v++; 669 } 670 } 671 jj++; 672 } 673 v = bmat->a; jj = bmat->j; 674 for (i=0; i<bmat->nz; i++) { 675 for (j=0; j<bs; j++){ 676 col = bs*garray[*jj] + j; 677 for (row=0; row<bs; row++){ 678 tmp[col] += PetscAbsScalar(*v); v++; 679 } 680 } 681 jj++; 682 } 683 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 684 *nrm = 0.0; 685 for (j=0; j<mat->cmap->N; j++) { 686 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 687 } 688 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 689 } else if (type == NORM_INFINITY) { /* max row sum */ 690 PetscReal *sums; 691 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 692 sum = 0.0; 693 for (j=0; j<amat->mbs; j++) { 694 for (row=0; row<bs; row++) sums[row] = 0.0; 695 v = amat->a + bs2*amat->i[j]; 696 nz = amat->i[j+1]-amat->i[j]; 697 for (i=0; i<nz; i++) { 698 for (col=0; col<bs; col++){ 699 for (row=0; row<bs; row++){ 700 sums[row] += PetscAbsScalar(*v); v++; 701 } 702 } 703 } 704 v = bmat->a + bs2*bmat->i[j]; 705 nz = bmat->i[j+1]-bmat->i[j]; 706 for (i=0; i<nz; i++) { 707 for (col=0; col<bs; col++){ 708 for (row=0; row<bs; row++){ 709 sums[row] += PetscAbsScalar(*v); v++; 710 } 711 } 712 } 713 for (row=0; row<bs; row++){ 714 if (sums[row] > sum) sum = sums[row]; 715 } 716 } 717 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 718 ierr = PetscFree(sums);CHKERRQ(ierr); 719 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet"); 720 } 721 PetscFunctionReturn(0); 722 } 723 724 /* 725 Creates the hash table, and sets the table 726 This table is created only once. 727 If new entried need to be added to the matrix 728 then the hash table has to be destroyed and 729 recreated. 730 */ 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 733 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 734 { 735 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 736 Mat A = baij->A,B=baij->B; 737 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 738 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 739 PetscErrorCode ierr; 740 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 741 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 742 PetscInt *HT,key; 743 MatScalar **HD; 744 PetscReal tmp; 745 #if defined(PETSC_USE_INFO) 746 PetscInt ct=0,max=0; 747 #endif 748 749 PetscFunctionBegin; 750 if (baij->ht) PetscFunctionReturn(0); 751 752 baij->ht_size = (PetscInt)(factor*nz); 753 ht_size = baij->ht_size; 754 755 /* Allocate Memory for Hash Table */ 756 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 757 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 758 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 759 HD = baij->hd; 760 HT = baij->ht; 761 762 /* Loop Over A */ 763 for (i=0; i<a->mbs; i++) { 764 for (j=ai[i]; j<ai[i+1]; j++) { 765 row = i+rstart; 766 col = aj[j]+cstart; 767 768 key = row*Nbs + col + 1; 769 h1 = HASH(ht_size,key,tmp); 770 for (k=0; k<ht_size; k++){ 771 if (!HT[(h1+k)%ht_size]) { 772 HT[(h1+k)%ht_size] = key; 773 HD[(h1+k)%ht_size] = a->a + j*bs2; 774 break; 775 #if defined(PETSC_USE_INFO) 776 } else { 777 ct++; 778 #endif 779 } 780 } 781 #if defined(PETSC_USE_INFO) 782 if (k> max) max = k; 783 #endif 784 } 785 } 786 /* Loop Over B */ 787 for (i=0; i<b->mbs; i++) { 788 for (j=bi[i]; j<bi[i+1]; j++) { 789 row = i+rstart; 790 col = garray[bj[j]]; 791 key = row*Nbs + col + 1; 792 h1 = HASH(ht_size,key,tmp); 793 for (k=0; k<ht_size; k++){ 794 if (!HT[(h1+k)%ht_size]) { 795 HT[(h1+k)%ht_size] = key; 796 HD[(h1+k)%ht_size] = b->a + j*bs2; 797 break; 798 #if defined(PETSC_USE_INFO) 799 } else { 800 ct++; 801 #endif 802 } 803 } 804 #if defined(PETSC_USE_INFO) 805 if (k> max) max = k; 806 #endif 807 } 808 } 809 810 /* Print Summary */ 811 #if defined(PETSC_USE_INFO) 812 for (i=0,j=0; i<ht_size; i++) { 813 if (HT[i]) {j++;} 814 } 815 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 816 #endif 817 PetscFunctionReturn(0); 818 } 819 820 #undef __FUNCT__ 821 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 822 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 823 { 824 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 825 PetscErrorCode ierr; 826 PetscInt nstash,reallocs; 827 InsertMode addv; 828 829 PetscFunctionBegin; 830 if (baij->donotstash || mat->nooffprocentries) { 831 PetscFunctionReturn(0); 832 } 833 834 /* make sure all processors are either in INSERTMODE or ADDMODE */ 835 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 836 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 837 mat->insertmode = addv; /* in case this processor had no cache */ 838 839 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 840 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 841 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 842 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 843 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 844 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 845 PetscFunctionReturn(0); 846 } 847 848 #undef __FUNCT__ 849 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 850 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 851 { 852 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 853 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 854 PetscErrorCode ierr; 855 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 856 PetscInt *row,*col; 857 PetscBool r1,r2,r3,other_disassembled; 858 MatScalar *val; 859 InsertMode addv = mat->insertmode; 860 PetscMPIInt n; 861 862 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 863 PetscFunctionBegin; 864 if (!baij->donotstash && !mat->nooffprocentries) { 865 while (1) { 866 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 867 if (!flg) break; 868 869 for (i=0; i<n;) { 870 /* Now identify the consecutive vals belonging to the same row */ 871 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 872 if (j < n) ncols = j-i; 873 else ncols = n-i; 874 /* Now assemble all these values with a single function call */ 875 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 876 i = j; 877 } 878 } 879 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 880 /* Now process the block-stash. Since the values are stashed column-oriented, 881 set the roworiented flag to column oriented, and after MatSetValues() 882 restore the original flags */ 883 r1 = baij->roworiented; 884 r2 = a->roworiented; 885 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 886 baij->roworiented = PETSC_FALSE; 887 a->roworiented = PETSC_FALSE; 888 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 889 while (1) { 890 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 891 if (!flg) break; 892 893 for (i=0; i<n;) { 894 /* Now identify the consecutive vals belonging to the same row */ 895 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 896 if (j < n) ncols = j-i; 897 else ncols = n-i; 898 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 899 i = j; 900 } 901 } 902 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 903 baij->roworiented = r1; 904 a->roworiented = r2; 905 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 906 } 907 908 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 909 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 910 911 /* determine if any processor has disassembled, if so we must 912 also disassemble ourselfs, in order that we may reassemble. */ 913 /* 914 if nonzero structure of submatrix B cannot change then we know that 915 no processor disassembled thus we can skip this stuff 916 */ 917 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 918 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 919 if (mat->was_assembled && !other_disassembled) { 920 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 921 } 922 } 923 924 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 925 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 926 } 927 ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 928 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 929 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 930 931 #if defined(PETSC_USE_INFO) 932 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 933 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 934 baij->ht_total_ct = 0; 935 baij->ht_insert_ct = 0; 936 } 937 #endif 938 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 939 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 940 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 941 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 942 } 943 944 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 945 baij->rowvalues = 0; 946 PetscFunctionReturn(0); 947 } 948 949 #undef __FUNCT__ 950 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 951 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 952 { 953 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 954 PetscErrorCode ierr; 955 PetscMPIInt size = baij->size,rank = baij->rank; 956 PetscInt bs = mat->rmap->bs; 957 PetscBool iascii,isdraw; 958 PetscViewer sviewer; 959 PetscViewerFormat format; 960 961 PetscFunctionBegin; 962 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 963 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 964 if (iascii) { 965 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 966 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 967 MatInfo info; 968 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 969 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 970 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 971 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 972 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 973 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 974 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 975 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 976 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 977 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 978 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 979 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 980 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 981 PetscFunctionReturn(0); 982 } else if (format == PETSC_VIEWER_ASCII_INFO) { 983 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 984 PetscFunctionReturn(0); 985 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 986 PetscFunctionReturn(0); 987 } 988 } 989 990 if (isdraw) { 991 PetscDraw draw; 992 PetscBool isnull; 993 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 994 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 995 } 996 997 if (size == 1) { 998 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 999 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1000 } else { 1001 /* assemble the entire matrix onto first processor. */ 1002 Mat A; 1003 Mat_SeqBAIJ *Aloc; 1004 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1005 MatScalar *a; 1006 1007 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1008 /* Perhaps this should be the type of mat? */ 1009 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1010 if (!rank) { 1011 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1012 } else { 1013 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1014 } 1015 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1016 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1017 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1018 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1019 1020 /* copy over the A part */ 1021 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1022 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1023 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1024 1025 for (i=0; i<mbs; i++) { 1026 rvals[0] = bs*(baij->rstartbs + i); 1027 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1028 for (j=ai[i]; j<ai[i+1]; j++) { 1029 col = (baij->cstartbs+aj[j])*bs; 1030 for (k=0; k<bs; k++) { 1031 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1032 col++; a += bs; 1033 } 1034 } 1035 } 1036 /* copy over the B part */ 1037 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1038 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1039 for (i=0; i<mbs; i++) { 1040 rvals[0] = bs*(baij->rstartbs + i); 1041 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1042 for (j=ai[i]; j<ai[i+1]; j++) { 1043 col = baij->garray[aj[j]]*bs; 1044 for (k=0; k<bs; k++) { 1045 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1046 col++; a += bs; 1047 } 1048 } 1049 } 1050 ierr = PetscFree(rvals);CHKERRQ(ierr); 1051 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1052 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1053 /* 1054 Everyone has to call to draw the matrix since the graphics waits are 1055 synchronized across all processors that share the PetscDraw object 1056 */ 1057 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1058 if (!rank) { 1059 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1060 /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/ 1061 PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ); 1062 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1063 } 1064 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1065 ierr = MatDestroy(&A);CHKERRQ(ierr); 1066 } 1067 PetscFunctionReturn(0); 1068 } 1069 1070 #undef __FUNCT__ 1071 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1072 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1073 { 1074 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1075 Mat_SeqBAIJ* A = (Mat_SeqBAIJ*)a->A->data; 1076 Mat_SeqBAIJ* B = (Mat_SeqBAIJ*)a->B->data; 1077 PetscErrorCode ierr; 1078 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1079 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1080 int fd; 1081 PetscScalar *column_values; 1082 FILE *file; 1083 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1084 PetscInt message_count,flowcontrolcount; 1085 1086 PetscFunctionBegin; 1087 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1088 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1089 nz = bs2*(A->nz + B->nz); 1090 rlen = mat->rmap->n; 1091 if (!rank) { 1092 header[0] = MAT_FILE_CLASSID; 1093 header[1] = mat->rmap->N; 1094 header[2] = mat->cmap->N; 1095 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1096 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1097 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1098 /* get largest number of rows any processor has */ 1099 range = mat->rmap->range; 1100 for (i=1; i<size; i++) { 1101 rlen = PetscMax(rlen,range[i+1] - range[i]); 1102 } 1103 } else { 1104 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1105 } 1106 1107 ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr); 1108 /* compute lengths of each row */ 1109 for (i=0; i<a->mbs; i++) { 1110 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1111 } 1112 /* store the row lengths to the file */ 1113 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1114 if (!rank) { 1115 MPI_Status status; 1116 ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr); 1117 rlen = (range[1] - range[0])/bs; 1118 for (i=0; i<rlen; i++) { 1119 for (j=0; j<bs; j++) { 1120 row_lens[i*bs+j] = bs*crow_lens[i]; 1121 } 1122 } 1123 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1124 for (i=1; i<size; i++) { 1125 rlen = (range[i+1] - range[i])/bs; 1126 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1127 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1128 for (k=0; k<rlen; k++) { 1129 for (j=0; j<bs; j++) { 1130 row_lens[k*bs+j] = bs*crow_lens[k]; 1131 } 1132 } 1133 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1134 } 1135 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1136 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1137 } else { 1138 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1139 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1140 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1141 } 1142 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1143 1144 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1145 information needed to make it for each row from a block row. This does require more communication but still not more than 1146 the communication needed for the nonzero values */ 1147 nzmax = nz; /* space a largest processor needs */ 1148 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1149 ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1150 cnt = 0; 1151 for (i=0; i<a->mbs; i++) { 1152 pcnt = cnt; 1153 for (j=B->i[i]; j<B->i[i+1]; j++) { 1154 if ( (col = garray[B->j[j]]) > cstart) break; 1155 for (l=0; l<bs; l++) { 1156 column_indices[cnt++] = bs*col+l; 1157 } 1158 } 1159 for (k=A->i[i]; k<A->i[i+1]; k++) { 1160 for (l=0; l<bs; l++) { 1161 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1162 } 1163 } 1164 for (; j<B->i[i+1]; j++) { 1165 for (l=0; l<bs; l++) { 1166 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1167 } 1168 } 1169 len = cnt - pcnt; 1170 for (k=1; k<bs; k++) { 1171 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1172 cnt += len; 1173 } 1174 } 1175 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1176 1177 /* store the columns to the file */ 1178 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1179 if (!rank) { 1180 MPI_Status status; 1181 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1182 for (i=1; i<size; i++) { 1183 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1184 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1185 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1186 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1187 } 1188 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1189 } else { 1190 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1191 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1192 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1193 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1194 } 1195 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1196 1197 /* load up the numerical values */ 1198 ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1199 cnt = 0; 1200 for (i=0; i<a->mbs; i++) { 1201 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1202 for (j=B->i[i]; j<B->i[i+1]; j++) { 1203 if ( garray[B->j[j]] > cstart) break; 1204 for (l=0; l<bs; l++) { 1205 for (ll=0; ll<bs; ll++) { 1206 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1207 } 1208 } 1209 cnt += bs; 1210 } 1211 for (k=A->i[i]; k<A->i[i+1]; k++) { 1212 for (l=0; l<bs; l++) { 1213 for (ll=0; ll<bs; ll++) { 1214 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1215 } 1216 } 1217 cnt += bs; 1218 } 1219 for (; j<B->i[i+1]; j++) { 1220 for (l=0; l<bs; l++) { 1221 for (ll=0; ll<bs; ll++) { 1222 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1223 } 1224 } 1225 cnt += bs; 1226 } 1227 cnt += (bs-1)*rlen; 1228 } 1229 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1230 1231 /* store the column values to the file */ 1232 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1233 if (!rank) { 1234 MPI_Status status; 1235 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1236 for (i=1; i<size; i++) { 1237 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1238 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1239 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1240 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1241 } 1242 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1243 } else { 1244 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1245 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1246 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1247 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1248 } 1249 ierr = PetscFree(column_values);CHKERRQ(ierr); 1250 1251 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1252 if (file) { 1253 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1254 } 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatView_MPIBAIJ" 1260 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1261 { 1262 PetscErrorCode ierr; 1263 PetscBool iascii,isdraw,issocket,isbinary; 1264 1265 PetscFunctionBegin; 1266 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1267 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1268 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1269 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1270 if (iascii || isdraw || issocket) { 1271 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1272 } else if (isbinary) { 1273 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1274 } else { 1275 SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1276 } 1277 PetscFunctionReturn(0); 1278 } 1279 1280 #undef __FUNCT__ 1281 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1282 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1283 { 1284 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1285 PetscErrorCode ierr; 1286 1287 PetscFunctionBegin; 1288 #if defined(PETSC_USE_LOG) 1289 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1290 #endif 1291 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1292 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1293 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1294 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1295 #if defined (PETSC_USE_CTABLE) 1296 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1297 #else 1298 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1299 #endif 1300 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1301 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1302 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1303 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1304 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1305 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1306 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1307 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1308 1309 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1310 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1311 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1312 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1313 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1314 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1315 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1316 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1317 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1318 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",PETSC_NULL);CHKERRQ(ierr); 1319 PetscFunctionReturn(0); 1320 } 1321 1322 #undef __FUNCT__ 1323 #define __FUNCT__ "MatMult_MPIBAIJ" 1324 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1325 { 1326 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1327 PetscErrorCode ierr; 1328 PetscInt nt; 1329 1330 PetscFunctionBegin; 1331 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1332 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1333 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1334 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1335 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1336 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1337 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1338 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1339 PetscFunctionReturn(0); 1340 } 1341 1342 #undef __FUNCT__ 1343 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1344 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1345 { 1346 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1347 PetscErrorCode ierr; 1348 1349 PetscFunctionBegin; 1350 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1351 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1352 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1353 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1354 PetscFunctionReturn(0); 1355 } 1356 1357 #undef __FUNCT__ 1358 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1359 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1360 { 1361 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1362 PetscErrorCode ierr; 1363 PetscBool merged; 1364 1365 PetscFunctionBegin; 1366 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1367 /* do nondiagonal part */ 1368 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1369 if (!merged) { 1370 /* send it on its way */ 1371 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1372 /* do local part */ 1373 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1374 /* receive remote parts: note this assumes the values are not actually */ 1375 /* inserted in yy until the next line */ 1376 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1377 } else { 1378 /* do local part */ 1379 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1380 /* send it on its way */ 1381 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1382 /* values actually were received in the Begin() but we need to call this nop */ 1383 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1384 } 1385 PetscFunctionReturn(0); 1386 } 1387 1388 #undef __FUNCT__ 1389 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1390 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1391 { 1392 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1393 PetscErrorCode ierr; 1394 1395 PetscFunctionBegin; 1396 /* do nondiagonal part */ 1397 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1398 /* send it on its way */ 1399 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1400 /* do local part */ 1401 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1402 /* receive remote parts: note this assumes the values are not actually */ 1403 /* inserted in yy until the next line, which is true for my implementation*/ 1404 /* but is not perhaps always true. */ 1405 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1406 PetscFunctionReturn(0); 1407 } 1408 1409 /* 1410 This only works correctly for square matrices where the subblock A->A is the 1411 diagonal block 1412 */ 1413 #undef __FUNCT__ 1414 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1415 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1416 { 1417 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1418 PetscErrorCode ierr; 1419 1420 PetscFunctionBegin; 1421 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1422 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1423 PetscFunctionReturn(0); 1424 } 1425 1426 #undef __FUNCT__ 1427 #define __FUNCT__ "MatScale_MPIBAIJ" 1428 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1429 { 1430 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1431 PetscErrorCode ierr; 1432 1433 PetscFunctionBegin; 1434 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1435 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1436 PetscFunctionReturn(0); 1437 } 1438 1439 #undef __FUNCT__ 1440 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1441 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1442 { 1443 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1444 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1445 PetscErrorCode ierr; 1446 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1447 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1448 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1449 1450 PetscFunctionBegin; 1451 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1452 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1453 mat->getrowactive = PETSC_TRUE; 1454 1455 if (!mat->rowvalues && (idx || v)) { 1456 /* 1457 allocate enough space to hold information from the longest row. 1458 */ 1459 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1460 PetscInt max = 1,mbs = mat->mbs,tmp; 1461 for (i=0; i<mbs; i++) { 1462 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1463 if (max < tmp) { max = tmp; } 1464 } 1465 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1466 } 1467 lrow = row - brstart; 1468 1469 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1470 if (!v) {pvA = 0; pvB = 0;} 1471 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1472 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1473 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1474 nztot = nzA + nzB; 1475 1476 cmap = mat->garray; 1477 if (v || idx) { 1478 if (nztot) { 1479 /* Sort by increasing column numbers, assuming A and B already sorted */ 1480 PetscInt imark = -1; 1481 if (v) { 1482 *v = v_p = mat->rowvalues; 1483 for (i=0; i<nzB; i++) { 1484 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1485 else break; 1486 } 1487 imark = i; 1488 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1489 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1490 } 1491 if (idx) { 1492 *idx = idx_p = mat->rowindices; 1493 if (imark > -1) { 1494 for (i=0; i<imark; i++) { 1495 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1496 } 1497 } else { 1498 for (i=0; i<nzB; i++) { 1499 if (cmap[cworkB[i]/bs] < cstart) 1500 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1501 else break; 1502 } 1503 imark = i; 1504 } 1505 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1506 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1507 } 1508 } else { 1509 if (idx) *idx = 0; 1510 if (v) *v = 0; 1511 } 1512 } 1513 *nz = nztot; 1514 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1515 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1516 PetscFunctionReturn(0); 1517 } 1518 1519 #undef __FUNCT__ 1520 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1521 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1522 { 1523 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1524 1525 PetscFunctionBegin; 1526 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1527 baij->getrowactive = PETSC_FALSE; 1528 PetscFunctionReturn(0); 1529 } 1530 1531 #undef __FUNCT__ 1532 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1533 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1534 { 1535 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1536 PetscErrorCode ierr; 1537 1538 PetscFunctionBegin; 1539 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1540 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1541 PetscFunctionReturn(0); 1542 } 1543 1544 #undef __FUNCT__ 1545 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1546 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1547 { 1548 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1549 Mat A = a->A,B = a->B; 1550 PetscErrorCode ierr; 1551 PetscReal isend[5],irecv[5]; 1552 1553 PetscFunctionBegin; 1554 info->block_size = (PetscReal)matin->rmap->bs; 1555 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1556 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1557 isend[3] = info->memory; isend[4] = info->mallocs; 1558 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1559 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1560 isend[3] += info->memory; isend[4] += info->mallocs; 1561 if (flag == MAT_LOCAL) { 1562 info->nz_used = isend[0]; 1563 info->nz_allocated = isend[1]; 1564 info->nz_unneeded = isend[2]; 1565 info->memory = isend[3]; 1566 info->mallocs = isend[4]; 1567 } else if (flag == MAT_GLOBAL_MAX) { 1568 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1569 info->nz_used = irecv[0]; 1570 info->nz_allocated = irecv[1]; 1571 info->nz_unneeded = irecv[2]; 1572 info->memory = irecv[3]; 1573 info->mallocs = irecv[4]; 1574 } else if (flag == MAT_GLOBAL_SUM) { 1575 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1576 info->nz_used = irecv[0]; 1577 info->nz_allocated = irecv[1]; 1578 info->nz_unneeded = irecv[2]; 1579 info->memory = irecv[3]; 1580 info->mallocs = irecv[4]; 1581 } else { 1582 SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1583 } 1584 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1585 info->fill_ratio_needed = 0; 1586 info->factor_mallocs = 0; 1587 PetscFunctionReturn(0); 1588 } 1589 1590 #undef __FUNCT__ 1591 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1592 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1593 { 1594 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1595 PetscErrorCode ierr; 1596 1597 PetscFunctionBegin; 1598 switch (op) { 1599 case MAT_NEW_NONZERO_LOCATIONS: 1600 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1601 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1602 case MAT_KEEP_NONZERO_PATTERN: 1603 case MAT_NEW_NONZERO_LOCATION_ERR: 1604 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1605 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1606 break; 1607 case MAT_ROW_ORIENTED: 1608 a->roworiented = flg; 1609 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1610 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1611 break; 1612 case MAT_NEW_DIAGONALS: 1613 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1614 break; 1615 case MAT_IGNORE_OFF_PROC_ENTRIES: 1616 a->donotstash = flg; 1617 break; 1618 case MAT_USE_HASH_TABLE: 1619 a->ht_flag = flg; 1620 break; 1621 case MAT_SYMMETRIC: 1622 case MAT_STRUCTURALLY_SYMMETRIC: 1623 case MAT_HERMITIAN: 1624 case MAT_SYMMETRY_ETERNAL: 1625 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1626 break; 1627 default: 1628 SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op); 1629 } 1630 PetscFunctionReturn(0); 1631 } 1632 1633 #undef __FUNCT__ 1634 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1635 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1636 { 1637 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1638 Mat_SeqBAIJ *Aloc; 1639 Mat B; 1640 PetscErrorCode ierr; 1641 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1642 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1643 MatScalar *a; 1644 1645 PetscFunctionBegin; 1646 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1647 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1648 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1649 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1650 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1651 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1652 } else { 1653 B = *matout; 1654 } 1655 1656 /* copy over the A part */ 1657 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1658 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1659 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1660 1661 for (i=0; i<mbs; i++) { 1662 rvals[0] = bs*(baij->rstartbs + i); 1663 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1664 for (j=ai[i]; j<ai[i+1]; j++) { 1665 col = (baij->cstartbs+aj[j])*bs; 1666 for (k=0; k<bs; k++) { 1667 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1668 col++; a += bs; 1669 } 1670 } 1671 } 1672 /* copy over the B part */ 1673 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1674 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1675 for (i=0; i<mbs; i++) { 1676 rvals[0] = bs*(baij->rstartbs + i); 1677 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1678 for (j=ai[i]; j<ai[i+1]; j++) { 1679 col = baij->garray[aj[j]]*bs; 1680 for (k=0; k<bs; k++) { 1681 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1682 col++; a += bs; 1683 } 1684 } 1685 } 1686 ierr = PetscFree(rvals);CHKERRQ(ierr); 1687 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1688 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1689 1690 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1691 *matout = B; 1692 } else { 1693 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1694 } 1695 PetscFunctionReturn(0); 1696 } 1697 1698 #undef __FUNCT__ 1699 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1700 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1701 { 1702 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1703 Mat a = baij->A,b = baij->B; 1704 PetscErrorCode ierr; 1705 PetscInt s1,s2,s3; 1706 1707 PetscFunctionBegin; 1708 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1709 if (rr) { 1710 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1711 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1712 /* Overlap communication with computation. */ 1713 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1714 } 1715 if (ll) { 1716 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1717 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1718 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1719 } 1720 /* scale the diagonal block */ 1721 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1722 1723 if (rr) { 1724 /* Do a scatter end and then right scale the off-diagonal block */ 1725 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1726 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1727 } 1728 1729 PetscFunctionReturn(0); 1730 } 1731 1732 #undef __FUNCT__ 1733 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1734 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1735 { 1736 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1737 PetscErrorCode ierr; 1738 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1739 PetscInt i,*owners = A->rmap->range; 1740 PetscInt *nprocs,j,idx,nsends,row; 1741 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1742 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1743 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1744 MPI_Comm comm = ((PetscObject)A)->comm; 1745 MPI_Request *send_waits,*recv_waits; 1746 MPI_Status recv_status,*send_status; 1747 const PetscScalar *xx; 1748 PetscScalar *bb; 1749 #if defined(PETSC_DEBUG) 1750 PetscBool found = PETSC_FALSE; 1751 #endif 1752 1753 PetscFunctionBegin; 1754 /* first count number of contributors to each processor */ 1755 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1756 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1757 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1758 j = 0; 1759 for (i=0; i<N; i++) { 1760 if (lastidx > (idx = rows[i])) j = 0; 1761 lastidx = idx; 1762 for (; j<size; j++) { 1763 if (idx >= owners[j] && idx < owners[j+1]) { 1764 nprocs[2*j]++; 1765 nprocs[2*j+1] = 1; 1766 owner[i] = j; 1767 #if defined(PETSC_DEBUG) 1768 found = PETSC_TRUE; 1769 #endif 1770 break; 1771 } 1772 } 1773 #if defined(PETSC_DEBUG) 1774 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1775 found = PETSC_FALSE; 1776 #endif 1777 } 1778 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1779 1780 if (A->nooffproczerorows) { 1781 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 1782 nrecvs = nsends; 1783 nmax = N; 1784 } else { 1785 /* inform other processors of number of messages and max length*/ 1786 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1787 } 1788 1789 /* post receives: */ 1790 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1791 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1792 for (i=0; i<nrecvs; i++) { 1793 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1794 } 1795 1796 /* do sends: 1797 1) starts[i] gives the starting index in svalues for stuff going to 1798 the ith processor 1799 */ 1800 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1801 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1802 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1803 starts[0] = 0; 1804 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1805 for (i=0; i<N; i++) { 1806 svalues[starts[owner[i]]++] = rows[i]; 1807 } 1808 1809 starts[0] = 0; 1810 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1811 count = 0; 1812 for (i=0; i<size; i++) { 1813 if (nprocs[2*i+1]) { 1814 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1815 } 1816 } 1817 ierr = PetscFree(starts);CHKERRQ(ierr); 1818 1819 base = owners[rank]; 1820 1821 /* wait on receives */ 1822 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1823 count = nrecvs; 1824 slen = 0; 1825 while (count) { 1826 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1827 /* unpack receives into our local space */ 1828 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1829 source[imdex] = recv_status.MPI_SOURCE; 1830 lens[imdex] = n; 1831 slen += n; 1832 count--; 1833 } 1834 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1835 1836 /* move the data into the send scatter */ 1837 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1838 count = 0; 1839 for (i=0; i<nrecvs; i++) { 1840 values = rvalues + i*nmax; 1841 for (j=0; j<lens[i]; j++) { 1842 lrows[count++] = values[j] - base; 1843 } 1844 } 1845 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1846 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1847 ierr = PetscFree(owner);CHKERRQ(ierr); 1848 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1849 1850 /* fix right hand side if needed */ 1851 if (x && b) { 1852 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1853 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1854 for (i=0; i<slen; i++) { 1855 bb[lrows[i]] = diag*xx[lrows[i]]; 1856 } 1857 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1858 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1859 } 1860 1861 /* actually zap the local rows */ 1862 /* 1863 Zero the required rows. If the "diagonal block" of the matrix 1864 is square and the user wishes to set the diagonal we use separate 1865 code so that MatSetValues() is not called for each diagonal allocating 1866 new memory, thus calling lots of mallocs and slowing things down. 1867 1868 */ 1869 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1870 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1871 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1872 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 1873 } else if (diag != 0.0) { 1874 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1875 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1876 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1877 for (i=0; i<slen; i++) { 1878 row = lrows[i] + rstart_bs; 1879 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1880 } 1881 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1882 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1883 } else { 1884 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1885 } 1886 1887 ierr = PetscFree(lrows);CHKERRQ(ierr); 1888 1889 /* wait on sends */ 1890 if (nsends) { 1891 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1892 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1893 ierr = PetscFree(send_status);CHKERRQ(ierr); 1894 } 1895 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1896 ierr = PetscFree(svalues);CHKERRQ(ierr); 1897 1898 PetscFunctionReturn(0); 1899 } 1900 1901 #undef __FUNCT__ 1902 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1903 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1904 { 1905 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1906 PetscErrorCode ierr; 1907 1908 PetscFunctionBegin; 1909 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1910 PetscFunctionReturn(0); 1911 } 1912 1913 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1914 1915 #undef __FUNCT__ 1916 #define __FUNCT__ "MatEqual_MPIBAIJ" 1917 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1918 { 1919 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1920 Mat a,b,c,d; 1921 PetscBool flg; 1922 PetscErrorCode ierr; 1923 1924 PetscFunctionBegin; 1925 a = matA->A; b = matA->B; 1926 c = matB->A; d = matB->B; 1927 1928 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1929 if (flg) { 1930 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1931 } 1932 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1933 PetscFunctionReturn(0); 1934 } 1935 1936 #undef __FUNCT__ 1937 #define __FUNCT__ "MatCopy_MPIBAIJ" 1938 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1939 { 1940 PetscErrorCode ierr; 1941 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1942 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1943 1944 PetscFunctionBegin; 1945 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1946 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1947 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1948 } else { 1949 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1950 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1951 } 1952 PetscFunctionReturn(0); 1953 } 1954 1955 #undef __FUNCT__ 1956 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1957 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1958 { 1959 PetscErrorCode ierr; 1960 1961 PetscFunctionBegin; 1962 ierr = MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1963 PetscFunctionReturn(0); 1964 } 1965 1966 #undef __FUNCT__ 1967 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1968 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1969 { 1970 PetscErrorCode ierr; 1971 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1972 PetscBLASInt bnz,one=1; 1973 Mat_SeqBAIJ *x,*y; 1974 1975 PetscFunctionBegin; 1976 if (str == SAME_NONZERO_PATTERN) { 1977 PetscScalar alpha = a; 1978 x = (Mat_SeqBAIJ *)xx->A->data; 1979 y = (Mat_SeqBAIJ *)yy->A->data; 1980 bnz = PetscBLASIntCast(x->nz); 1981 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1982 x = (Mat_SeqBAIJ *)xx->B->data; 1983 y = (Mat_SeqBAIJ *)yy->B->data; 1984 bnz = PetscBLASIntCast(x->nz); 1985 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1986 } else { 1987 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1988 } 1989 PetscFunctionReturn(0); 1990 } 1991 1992 #undef __FUNCT__ 1993 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ" 1994 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs) 1995 { 1996 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1997 PetscInt rbs,cbs; 1998 PetscErrorCode ierr; 1999 2000 PetscFunctionBegin; 2001 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 2002 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 2003 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 2004 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 2005 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 2006 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 2007 PetscFunctionReturn(0); 2008 } 2009 2010 #undef __FUNCT__ 2011 #define __FUNCT__ "MatRealPart_MPIBAIJ" 2012 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 2013 { 2014 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2015 PetscErrorCode ierr; 2016 2017 PetscFunctionBegin; 2018 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2019 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2020 PetscFunctionReturn(0); 2021 } 2022 2023 #undef __FUNCT__ 2024 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 2025 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2026 { 2027 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2028 PetscErrorCode ierr; 2029 2030 PetscFunctionBegin; 2031 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2032 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2033 PetscFunctionReturn(0); 2034 } 2035 2036 #undef __FUNCT__ 2037 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2038 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2039 { 2040 PetscErrorCode ierr; 2041 IS iscol_local; 2042 PetscInt csize; 2043 2044 PetscFunctionBegin; 2045 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2046 if (call == MAT_REUSE_MATRIX) { 2047 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2048 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2049 } else { 2050 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2051 } 2052 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2053 if (call == MAT_INITIAL_MATRIX) { 2054 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2055 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2056 } 2057 PetscFunctionReturn(0); 2058 } 2059 2060 #undef __FUNCT__ 2061 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2062 /* 2063 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2064 in local and then by concatenating the local matrices the end result. 2065 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2066 */ 2067 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2068 { 2069 PetscErrorCode ierr; 2070 PetscMPIInt rank,size; 2071 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2072 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2073 Mat *local,M,Mreuse; 2074 MatScalar *vwork,*aa; 2075 MPI_Comm comm = ((PetscObject)mat)->comm; 2076 Mat_SeqBAIJ *aij; 2077 2078 2079 PetscFunctionBegin; 2080 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2081 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2082 2083 if (call == MAT_REUSE_MATRIX) { 2084 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2085 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2086 local = &Mreuse; 2087 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2088 } else { 2089 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2090 Mreuse = *local; 2091 ierr = PetscFree(local);CHKERRQ(ierr); 2092 } 2093 2094 /* 2095 m - number of local rows 2096 n - number of columns (same on all processors) 2097 rstart - first row in new global matrix generated 2098 */ 2099 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2100 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2101 m = m/bs; 2102 n = n/bs; 2103 2104 if (call == MAT_INITIAL_MATRIX) { 2105 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2106 ii = aij->i; 2107 jj = aij->j; 2108 2109 /* 2110 Determine the number of non-zeros in the diagonal and off-diagonal 2111 portions of the matrix in order to do correct preallocation 2112 */ 2113 2114 /* first get start and end of "diagonal" columns */ 2115 if (csize == PETSC_DECIDE) { 2116 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2117 if (mglobal == n*bs) { /* square matrix */ 2118 nlocal = m; 2119 } else { 2120 nlocal = n/size + ((n % size) > rank); 2121 } 2122 } else { 2123 nlocal = csize/bs; 2124 } 2125 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2126 rstart = rend - nlocal; 2127 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2128 2129 /* next, compute all the lengths */ 2130 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2131 olens = dlens + m; 2132 for (i=0; i<m; i++) { 2133 jend = ii[i+1] - ii[i]; 2134 olen = 0; 2135 dlen = 0; 2136 for (j=0; j<jend; j++) { 2137 if (*jj < rstart || *jj >= rend) olen++; 2138 else dlen++; 2139 jj++; 2140 } 2141 olens[i] = olen; 2142 dlens[i] = dlen; 2143 } 2144 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2145 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2146 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2147 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2148 ierr = PetscFree(dlens);CHKERRQ(ierr); 2149 } else { 2150 PetscInt ml,nl; 2151 2152 M = *newmat; 2153 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2154 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2155 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2156 /* 2157 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2158 rather than the slower MatSetValues(). 2159 */ 2160 M->was_assembled = PETSC_TRUE; 2161 M->assembled = PETSC_FALSE; 2162 } 2163 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2164 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2165 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2166 ii = aij->i; 2167 jj = aij->j; 2168 aa = aij->a; 2169 for (i=0; i<m; i++) { 2170 row = rstart/bs + i; 2171 nz = ii[i+1] - ii[i]; 2172 cwork = jj; jj += nz; 2173 vwork = aa; aa += nz; 2174 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2175 } 2176 2177 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2178 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2179 *newmat = M; 2180 2181 /* save submatrix used in processor for next request */ 2182 if (call == MAT_INITIAL_MATRIX) { 2183 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2184 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2185 } 2186 2187 PetscFunctionReturn(0); 2188 } 2189 2190 #undef __FUNCT__ 2191 #define __FUNCT__ "MatPermute_MPIBAIJ" 2192 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2193 { 2194 MPI_Comm comm,pcomm; 2195 PetscInt first,local_size,nrows; 2196 const PetscInt *rows; 2197 PetscMPIInt size; 2198 IS crowp,growp,irowp,lrowp,lcolp,icolp; 2199 PetscErrorCode ierr; 2200 2201 PetscFunctionBegin; 2202 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2203 /* make a collective version of 'rowp' */ 2204 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2205 if (pcomm==comm) { 2206 crowp = rowp; 2207 } else { 2208 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2209 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2210 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2211 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2212 } 2213 /* collect the global row permutation and invert it */ 2214 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2215 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2216 if (pcomm!=comm) { 2217 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2218 } 2219 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2220 /* get the local target indices */ 2221 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2222 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 2223 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2224 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 2225 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2226 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 2227 /* the column permutation is so much easier; 2228 make a local version of 'colp' and invert it */ 2229 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2230 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2231 if (size==1) { 2232 lcolp = colp; 2233 } else { 2234 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 2235 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 2236 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr); 2237 } 2238 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2239 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 2240 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 2241 if (size>1) { 2242 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 2243 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2244 } 2245 /* now we just get the submatrix */ 2246 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2247 /* clean up */ 2248 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 2249 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 2250 PetscFunctionReturn(0); 2251 } 2252 2253 #undef __FUNCT__ 2254 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2255 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2256 { 2257 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2258 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2259 2260 PetscFunctionBegin; 2261 if (nghosts) { *nghosts = B->nbs;} 2262 if (ghosts) {*ghosts = baij->garray;} 2263 PetscFunctionReturn(0); 2264 } 2265 2266 extern PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat); 2267 2268 #undef __FUNCT__ 2269 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2270 /* 2271 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2272 */ 2273 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2274 { 2275 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2276 PetscErrorCode ierr; 2277 PetscMPIInt size,*ncolsonproc,*disp,nn; 2278 PetscInt bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col; 2279 const PetscInt *is; 2280 PetscInt nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj; 2281 PetscInt *rowhit,M,cstart,cend,colb; 2282 PetscInt *columnsforrow,l; 2283 IS *isa; 2284 PetscBool done,flg; 2285 ISLocalToGlobalMapping map = mat->cmap->bmapping; 2286 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2287 2288 PetscFunctionBegin; 2289 if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2290 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2291 2292 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2293 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2294 M = mat->rmap->n/bs; 2295 cstart = mat->cmap->rstart/bs; 2296 cend = mat->cmap->rend/bs; 2297 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2298 c->N = mat->cmap->N/bs; 2299 c->m = mat->rmap->n/bs; 2300 c->rstart = mat->rmap->rstart/bs; 2301 2302 c->ncolors = nis; 2303 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2304 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2305 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2306 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2307 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2308 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2309 2310 /* Allow access to data structures of local part of matrix */ 2311 if (!baij->colmap) { 2312 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2313 } 2314 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2315 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2316 2317 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2318 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2319 2320 for (i=0; i<nis; i++) { 2321 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2322 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2323 c->ncolumns[i] = n; 2324 if (n) { 2325 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2326 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2327 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2328 } else { 2329 c->columns[i] = 0; 2330 } 2331 2332 if (ctype == IS_COLORING_GLOBAL){ 2333 /* Determine the total (parallel) number of columns of this color */ 2334 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2335 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2336 2337 nn = PetscMPIIntCast(n); 2338 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2339 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2340 if (!nctot) { 2341 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2342 } 2343 2344 disp[0] = 0; 2345 for (j=1; j<size; j++) { 2346 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2347 } 2348 2349 /* Get complete list of columns for color on each processor */ 2350 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2351 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2352 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2353 } else if (ctype == IS_COLORING_GHOSTED){ 2354 /* Determine local number of columns of this color on this process, including ghost points */ 2355 nctot = n; 2356 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2357 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2358 } else { 2359 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2360 } 2361 2362 /* 2363 Mark all rows affect by these columns 2364 */ 2365 /* Temporary option to allow for debugging/testing */ 2366 flg = PETSC_FALSE; 2367 ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2368 if (!flg) {/*-----------------------------------------------------------------------------*/ 2369 /* crude, fast version */ 2370 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2371 /* loop over columns*/ 2372 for (j=0; j<nctot; j++) { 2373 if (ctype == IS_COLORING_GHOSTED) { 2374 col = ltog[cols[j]]; 2375 } else { 2376 col = cols[j]; 2377 } 2378 if (col >= cstart && col < cend) { 2379 /* column is in diagonal block of matrix */ 2380 rows = A_cj + A_ci[col-cstart]; 2381 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2382 } else { 2383 #if defined (PETSC_USE_CTABLE) 2384 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2385 colb --; 2386 #else 2387 colb = baij->colmap[col] - 1; 2388 #endif 2389 if (colb == -1) { 2390 m = 0; 2391 } else { 2392 colb = colb/bs; 2393 rows = B_cj + B_ci[colb]; 2394 m = B_ci[colb+1] - B_ci[colb]; 2395 } 2396 } 2397 /* loop over columns marking them in rowhit */ 2398 for (k=0; k<m; k++) { 2399 rowhit[*rows++] = col + 1; 2400 } 2401 } 2402 2403 /* count the number of hits */ 2404 nrows = 0; 2405 for (j=0; j<M; j++) { 2406 if (rowhit[j]) nrows++; 2407 } 2408 c->nrows[i] = nrows; 2409 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2410 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2411 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2412 nrows = 0; 2413 for (j=0; j<M; j++) { 2414 if (rowhit[j]) { 2415 c->rows[i][nrows] = j; 2416 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2417 nrows++; 2418 } 2419 } 2420 } else {/*-------------------------------------------------------------------------------*/ 2421 /* slow version, using rowhit as a linked list */ 2422 PetscInt currentcol,fm,mfm; 2423 rowhit[M] = M; 2424 nrows = 0; 2425 /* loop over columns*/ 2426 for (j=0; j<nctot; j++) { 2427 if (ctype == IS_COLORING_GHOSTED) { 2428 col = ltog[cols[j]]; 2429 } else { 2430 col = cols[j]; 2431 } 2432 if (col >= cstart && col < cend) { 2433 /* column is in diagonal block of matrix */ 2434 rows = A_cj + A_ci[col-cstart]; 2435 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2436 } else { 2437 #if defined (PETSC_USE_CTABLE) 2438 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2439 colb --; 2440 #else 2441 colb = baij->colmap[col] - 1; 2442 #endif 2443 if (colb == -1) { 2444 m = 0; 2445 } else { 2446 colb = colb/bs; 2447 rows = B_cj + B_ci[colb]; 2448 m = B_ci[colb+1] - B_ci[colb]; 2449 } 2450 } 2451 2452 /* loop over columns marking them in rowhit */ 2453 fm = M; /* fm points to first entry in linked list */ 2454 for (k=0; k<m; k++) { 2455 currentcol = *rows++; 2456 /* is it already in the list? */ 2457 do { 2458 mfm = fm; 2459 fm = rowhit[fm]; 2460 } while (fm < currentcol); 2461 /* not in list so add it */ 2462 if (fm != currentcol) { 2463 nrows++; 2464 columnsforrow[currentcol] = col; 2465 /* next three lines insert new entry into linked list */ 2466 rowhit[mfm] = currentcol; 2467 rowhit[currentcol] = fm; 2468 fm = currentcol; 2469 /* fm points to present position in list since we know the columns are sorted */ 2470 } else { 2471 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2472 } 2473 } 2474 } 2475 c->nrows[i] = nrows; 2476 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2477 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2478 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2479 /* now store the linked list of rows into c->rows[i] */ 2480 nrows = 0; 2481 fm = rowhit[M]; 2482 do { 2483 c->rows[i][nrows] = fm; 2484 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2485 fm = rowhit[fm]; 2486 } while (fm < M); 2487 } /* ---------------------------------------------------------------------------------------*/ 2488 ierr = PetscFree(cols);CHKERRQ(ierr); 2489 } 2490 2491 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2492 /* 2493 vscale will contain the "diagonal" on processor scalings followed by the off processor 2494 */ 2495 if (ctype == IS_COLORING_GLOBAL) { 2496 PetscInt *garray; 2497 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2498 for (i=0; i<baij->B->cmap->n/bs; i++) { 2499 for (j=0; j<bs; j++) { 2500 garray[i*bs+j] = bs*baij->garray[i]+j; 2501 } 2502 } 2503 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2504 ierr = PetscFree(garray);CHKERRQ(ierr); 2505 CHKMEMQ; 2506 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2507 for (k=0; k<c->ncolors; k++) { 2508 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2509 for (l=0; l<c->nrows[k]; l++) { 2510 col = c->columnsforrow[k][l]; 2511 if (col >= cstart && col < cend) { 2512 /* column is in diagonal block of matrix */ 2513 colb = col - cstart; 2514 } else { 2515 /* column is in "off-processor" part */ 2516 #if defined (PETSC_USE_CTABLE) 2517 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2518 colb --; 2519 #else 2520 colb = baij->colmap[col] - 1; 2521 #endif 2522 colb = colb/bs; 2523 colb += cend - cstart; 2524 } 2525 c->vscaleforrow[k][l] = colb; 2526 } 2527 } 2528 } else if (ctype == IS_COLORING_GHOSTED) { 2529 /* Get gtol mapping */ 2530 PetscInt N = mat->cmap->N, *gtol; 2531 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2532 for (i=0; i<N; i++) gtol[i] = -1; 2533 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2534 2535 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2536 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2537 for (k=0; k<c->ncolors; k++) { 2538 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2539 for (l=0; l<c->nrows[k]; l++) { 2540 col = c->columnsforrow[k][l]; /* global column index */ 2541 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2542 } 2543 } 2544 ierr = PetscFree(gtol);CHKERRQ(ierr); 2545 } 2546 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2547 2548 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2549 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2550 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2551 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2552 CHKMEMQ; 2553 PetscFunctionReturn(0); 2554 } 2555 2556 #undef __FUNCT__ 2557 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ" 2558 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2559 { 2560 Mat B; 2561 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2562 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2563 Mat_SeqAIJ *b; 2564 PetscErrorCode ierr; 2565 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2566 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2567 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2568 2569 PetscFunctionBegin; 2570 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2571 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2572 2573 /* ---------------------------------------------------------------- 2574 Tell every processor the number of nonzeros per row 2575 */ 2576 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2577 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2578 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2579 } 2580 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2581 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2582 displs = recvcounts + size; 2583 for (i=0; i<size; i++) { 2584 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2585 displs[i] = A->rmap->range[i]/bs; 2586 } 2587 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2588 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2589 #else 2590 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2591 #endif 2592 /* --------------------------------------------------------------- 2593 Create the sequential matrix of the same type as the local block diagonal 2594 */ 2595 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2596 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2597 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2598 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2599 b = (Mat_SeqAIJ *)B->data; 2600 2601 /*-------------------------------------------------------------------- 2602 Copy my part of matrix column indices over 2603 */ 2604 sendcount = ad->nz + bd->nz; 2605 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2606 a_jsendbuf = ad->j; 2607 b_jsendbuf = bd->j; 2608 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2609 cnt = 0; 2610 for (i=0; i<n; i++) { 2611 2612 /* put in lower diagonal portion */ 2613 m = bd->i[i+1] - bd->i[i]; 2614 while (m > 0) { 2615 /* is it above diagonal (in bd (compressed) numbering) */ 2616 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2617 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2618 m--; 2619 } 2620 2621 /* put in diagonal portion */ 2622 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2623 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2624 } 2625 2626 /* put in upper diagonal portion */ 2627 while (m-- > 0) { 2628 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2629 } 2630 } 2631 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2632 2633 /*-------------------------------------------------------------------- 2634 Gather all column indices to all processors 2635 */ 2636 for (i=0; i<size; i++) { 2637 recvcounts[i] = 0; 2638 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2639 recvcounts[i] += lens[j]; 2640 } 2641 } 2642 displs[0] = 0; 2643 for (i=1; i<size; i++) { 2644 displs[i] = displs[i-1] + recvcounts[i-1]; 2645 } 2646 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2647 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2648 #else 2649 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2650 #endif 2651 /*-------------------------------------------------------------------- 2652 Assemble the matrix into useable form (note numerical values not yet set) 2653 */ 2654 /* set the b->ilen (length of each row) values */ 2655 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2656 /* set the b->i indices */ 2657 b->i[0] = 0; 2658 for (i=1; i<=A->rmap->N/bs; i++) { 2659 b->i[i] = b->i[i-1] + lens[i-1]; 2660 } 2661 ierr = PetscFree(lens);CHKERRQ(ierr); 2662 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2663 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2664 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2665 2666 if (A->symmetric){ 2667 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2668 } else if (A->hermitian) { 2669 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2670 } else if (A->structurally_symmetric) { 2671 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2672 } 2673 *newmat = B; 2674 PetscFunctionReturn(0); 2675 } 2676 2677 #undef __FUNCT__ 2678 #define __FUNCT__ "MatSOR_MPIBAIJ" 2679 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2680 { 2681 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2682 PetscErrorCode ierr; 2683 Vec bb1 = 0; 2684 2685 PetscFunctionBegin; 2686 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2687 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2688 } 2689 2690 if (flag == SOR_APPLY_UPPER) { 2691 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2692 PetscFunctionReturn(0); 2693 } 2694 2695 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2696 if (flag & SOR_ZERO_INITIAL_GUESS) { 2697 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2698 its--; 2699 } 2700 2701 while (its--) { 2702 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2703 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2704 2705 /* update rhs: bb1 = bb - B*x */ 2706 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2707 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2708 2709 /* local sweep */ 2710 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2711 } 2712 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 2713 if (flag & SOR_ZERO_INITIAL_GUESS) { 2714 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2715 its--; 2716 } 2717 while (its--) { 2718 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2719 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2720 2721 /* update rhs: bb1 = bb - B*x */ 2722 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2723 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2724 2725 /* local sweep */ 2726 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2727 } 2728 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 2729 if (flag & SOR_ZERO_INITIAL_GUESS) { 2730 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2731 its--; 2732 } 2733 while (its--) { 2734 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2735 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2736 2737 /* update rhs: bb1 = bb - B*x */ 2738 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2739 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2740 2741 /* local sweep */ 2742 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2743 } 2744 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2745 2746 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2747 PetscFunctionReturn(0); 2748 } 2749 2750 extern PetscErrorCode MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2751 2752 #undef __FUNCT__ 2753 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ" 2754 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,PetscScalar **values) 2755 { 2756 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2757 PetscErrorCode ierr; 2758 2759 PetscFunctionBegin; 2760 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2761 PetscFunctionReturn(0); 2762 } 2763 2764 2765 /* -------------------------------------------------------------------*/ 2766 static struct _MatOps MatOps_Values = { 2767 MatSetValues_MPIBAIJ, 2768 MatGetRow_MPIBAIJ, 2769 MatRestoreRow_MPIBAIJ, 2770 MatMult_MPIBAIJ, 2771 /* 4*/ MatMultAdd_MPIBAIJ, 2772 MatMultTranspose_MPIBAIJ, 2773 MatMultTransposeAdd_MPIBAIJ, 2774 0, 2775 0, 2776 0, 2777 /*10*/ 0, 2778 0, 2779 0, 2780 MatSOR_MPIBAIJ, 2781 MatTranspose_MPIBAIJ, 2782 /*15*/ MatGetInfo_MPIBAIJ, 2783 MatEqual_MPIBAIJ, 2784 MatGetDiagonal_MPIBAIJ, 2785 MatDiagonalScale_MPIBAIJ, 2786 MatNorm_MPIBAIJ, 2787 /*20*/ MatAssemblyBegin_MPIBAIJ, 2788 MatAssemblyEnd_MPIBAIJ, 2789 MatSetOption_MPIBAIJ, 2790 MatZeroEntries_MPIBAIJ, 2791 /*24*/ MatZeroRows_MPIBAIJ, 2792 0, 2793 0, 2794 0, 2795 0, 2796 /*29*/ MatSetUpPreallocation_MPIBAIJ, 2797 0, 2798 0, 2799 0, 2800 0, 2801 /*34*/ MatDuplicate_MPIBAIJ, 2802 0, 2803 0, 2804 0, 2805 0, 2806 /*39*/ MatAXPY_MPIBAIJ, 2807 MatGetSubMatrices_MPIBAIJ, 2808 MatIncreaseOverlap_MPIBAIJ, 2809 MatGetValues_MPIBAIJ, 2810 MatCopy_MPIBAIJ, 2811 /*44*/ 0, 2812 MatScale_MPIBAIJ, 2813 0, 2814 0, 2815 0, 2816 /*49*/ MatSetBlockSize_MPIBAIJ, 2817 0, 2818 0, 2819 0, 2820 0, 2821 /*54*/ MatFDColoringCreate_MPIBAIJ, 2822 0, 2823 MatSetUnfactored_MPIBAIJ, 2824 MatPermute_MPIBAIJ, 2825 MatSetValuesBlocked_MPIBAIJ, 2826 /*59*/ MatGetSubMatrix_MPIBAIJ, 2827 MatDestroy_MPIBAIJ, 2828 MatView_MPIBAIJ, 2829 0, 2830 0, 2831 /*64*/ 0, 2832 0, 2833 0, 2834 0, 2835 0, 2836 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2837 0, 2838 0, 2839 0, 2840 0, 2841 /*74*/ 0, 2842 MatFDColoringApply_BAIJ, 2843 0, 2844 0, 2845 0, 2846 /*79*/ 0, 2847 0, 2848 0, 2849 0, 2850 MatLoad_MPIBAIJ, 2851 /*84*/ 0, 2852 0, 2853 0, 2854 0, 2855 0, 2856 /*89*/ 0, 2857 0, 2858 0, 2859 0, 2860 0, 2861 /*94*/ 0, 2862 0, 2863 0, 2864 0, 2865 0, 2866 /*99*/ 0, 2867 0, 2868 0, 2869 0, 2870 0, 2871 /*104*/0, 2872 MatRealPart_MPIBAIJ, 2873 MatImaginaryPart_MPIBAIJ, 2874 0, 2875 0, 2876 /*109*/0, 2877 0, 2878 0, 2879 0, 2880 0, 2881 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2882 0, 2883 MatGetGhosts_MPIBAIJ, 2884 0, 2885 0, 2886 /*119*/0, 2887 0, 2888 0, 2889 0, 2890 0, 2891 /*124*/0, 2892 0, 2893 MatInvertBlockDiagonal_MPIBAIJ 2894 }; 2895 2896 EXTERN_C_BEGIN 2897 #undef __FUNCT__ 2898 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2899 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2900 { 2901 PetscFunctionBegin; 2902 *a = ((Mat_MPIBAIJ *)A->data)->A; 2903 PetscFunctionReturn(0); 2904 } 2905 EXTERN_C_END 2906 2907 EXTERN_C_BEGIN 2908 extern PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2909 EXTERN_C_END 2910 2911 EXTERN_C_BEGIN 2912 #undef __FUNCT__ 2913 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2914 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2915 { 2916 PetscInt m,rstart,cstart,cend; 2917 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2918 const PetscInt *JJ=0; 2919 PetscScalar *values=0; 2920 PetscErrorCode ierr; 2921 2922 PetscFunctionBegin; 2923 2924 if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2925 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2926 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2927 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2928 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2929 m = B->rmap->n/bs; 2930 rstart = B->rmap->rstart/bs; 2931 cstart = B->cmap->rstart/bs; 2932 cend = B->cmap->rend/bs; 2933 2934 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2935 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2936 for (i=0; i<m; i++) { 2937 nz = ii[i+1] - ii[i]; 2938 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2939 nz_max = PetscMax(nz_max,nz); 2940 JJ = jj + ii[i]; 2941 for (j=0; j<nz; j++) { 2942 if (*JJ >= cstart) break; 2943 JJ++; 2944 } 2945 d = 0; 2946 for (; j<nz; j++) { 2947 if (*JJ++ >= cend) break; 2948 d++; 2949 } 2950 d_nnz[i] = d; 2951 o_nnz[i] = nz - d; 2952 } 2953 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2954 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2955 2956 values = (PetscScalar*)V; 2957 if (!values) { 2958 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2959 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2960 } 2961 for (i=0; i<m; i++) { 2962 PetscInt row = i + rstart; 2963 PetscInt ncols = ii[i+1] - ii[i]; 2964 const PetscInt *icols = jj + ii[i]; 2965 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2966 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2967 } 2968 2969 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2970 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2971 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2972 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2973 PetscFunctionReturn(0); 2974 } 2975 EXTERN_C_END 2976 2977 #undef __FUNCT__ 2978 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2979 /*@C 2980 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2981 (the default parallel PETSc format). 2982 2983 Collective on MPI_Comm 2984 2985 Input Parameters: 2986 + A - the matrix 2987 . bs - the block size 2988 . i - the indices into j for the start of each local row (starts with zero) 2989 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2990 - v - optional values in the matrix 2991 2992 Level: developer 2993 2994 .keywords: matrix, aij, compressed row, sparse, parallel 2995 2996 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2997 @*/ 2998 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2999 { 3000 PetscErrorCode ierr; 3001 3002 PetscFunctionBegin; 3003 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3004 PetscValidType(B,1); 3005 PetscValidLogicalCollectiveInt(B,bs,2); 3006 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3007 PetscFunctionReturn(0); 3008 } 3009 3010 EXTERN_C_BEGIN 3011 #undef __FUNCT__ 3012 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 3013 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 3014 { 3015 Mat_MPIBAIJ *b; 3016 PetscErrorCode ierr; 3017 PetscInt i, newbs = PetscAbs(bs); 3018 3019 PetscFunctionBegin; 3020 if (bs < 0) { 3021 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 3022 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 3023 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3024 bs = PetscAbs(bs); 3025 } 3026 if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 3027 bs = newbs; 3028 3029 3030 if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 3031 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3032 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3033 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3034 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3035 3036 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3037 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3038 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3039 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3040 3041 if (d_nnz) { 3042 for (i=0; i<B->rmap->n/bs; i++) { 3043 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 3044 } 3045 } 3046 if (o_nnz) { 3047 for (i=0; i<B->rmap->n/bs; i++) { 3048 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 3049 } 3050 } 3051 3052 b = (Mat_MPIBAIJ*)B->data; 3053 b->bs2 = bs*bs; 3054 b->mbs = B->rmap->n/bs; 3055 b->nbs = B->cmap->n/bs; 3056 b->Mbs = B->rmap->N/bs; 3057 b->Nbs = B->cmap->N/bs; 3058 3059 for (i=0; i<=b->size; i++) { 3060 b->rangebs[i] = B->rmap->range[i]/bs; 3061 } 3062 b->rstartbs = B->rmap->rstart/bs; 3063 b->rendbs = B->rmap->rend/bs; 3064 b->cstartbs = B->cmap->rstart/bs; 3065 b->cendbs = B->cmap->rend/bs; 3066 3067 if (!B->preallocated) { 3068 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3069 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3070 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 3071 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3072 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3073 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3074 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 3075 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3076 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 3077 } 3078 3079 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3080 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 3081 B->preallocated = PETSC_TRUE; 3082 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3083 PetscFunctionReturn(0); 3084 } 3085 EXTERN_C_END 3086 3087 EXTERN_C_BEGIN 3088 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 3089 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 3090 EXTERN_C_END 3091 3092 3093 EXTERN_C_BEGIN 3094 #undef __FUNCT__ 3095 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 3096 PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj) 3097 { 3098 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 3099 PetscErrorCode ierr; 3100 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 3101 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 3102 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 3103 3104 PetscFunctionBegin; 3105 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 3106 ii[0] = 0; 3107 CHKMEMQ; 3108 for (i=0; i<M; i++) { 3109 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 3110 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 3111 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 3112 /* remove one from count of matrix has diagonal */ 3113 for (j=id[i]; j<id[i+1]; j++) { 3114 if (jd[j] == i) {ii[i+1]--;break;} 3115 } 3116 CHKMEMQ; 3117 } 3118 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3119 cnt = 0; 3120 for (i=0; i<M; i++) { 3121 for (j=io[i]; j<io[i+1]; j++) { 3122 if (garray[jo[j]] > rstart) break; 3123 jj[cnt++] = garray[jo[j]]; 3124 CHKMEMQ; 3125 } 3126 for (k=id[i]; k<id[i+1]; k++) { 3127 if (jd[k] != i) { 3128 jj[cnt++] = rstart + jd[k]; 3129 CHKMEMQ; 3130 } 3131 } 3132 for (;j<io[i+1]; j++) { 3133 jj[cnt++] = garray[jo[j]]; 3134 CHKMEMQ; 3135 } 3136 } 3137 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 3138 PetscFunctionReturn(0); 3139 } 3140 EXTERN_C_END 3141 3142 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3143 EXTERN_C_BEGIN 3144 PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*); 3145 EXTERN_C_END 3146 3147 EXTERN_C_BEGIN 3148 #undef __FUNCT__ 3149 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 3150 PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat) 3151 { 3152 PetscErrorCode ierr; 3153 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3154 Mat B; 3155 Mat_MPIAIJ *b; 3156 3157 PetscFunctionBegin; 3158 if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled"); 3159 3160 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 3161 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3162 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 3163 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 3164 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 3165 b = (Mat_MPIAIJ*) B->data; 3166 3167 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 3168 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 3169 ierr = DisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 3170 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 3171 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 3172 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3173 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3174 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3175 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3176 if (reuse == MAT_REUSE_MATRIX) { 3177 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3178 } else { 3179 *newmat = B; 3180 } 3181 PetscFunctionReturn(0); 3182 } 3183 EXTERN_C_END 3184 3185 EXTERN_C_BEGIN 3186 #if defined(PETSC_HAVE_MUMPS) 3187 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3188 #endif 3189 EXTERN_C_END 3190 3191 /*MC 3192 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 3193 3194 Options Database Keys: 3195 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 3196 . -mat_block_size <bs> - set the blocksize used to store the matrix 3197 - -mat_use_hash_table <fact> 3198 3199 Level: beginner 3200 3201 .seealso: MatCreateMPIBAIJ 3202 M*/ 3203 3204 EXTERN_C_BEGIN 3205 extern PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,const MatType,MatReuse,Mat*); 3206 EXTERN_C_END 3207 3208 EXTERN_C_BEGIN 3209 #undef __FUNCT__ 3210 #define __FUNCT__ "MatCreate_MPIBAIJ" 3211 PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3212 { 3213 Mat_MPIBAIJ *b; 3214 PetscErrorCode ierr; 3215 PetscBool flg; 3216 3217 PetscFunctionBegin; 3218 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 3219 B->data = (void*)b; 3220 3221 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3222 B->assembled = PETSC_FALSE; 3223 3224 B->insertmode = NOT_SET_VALUES; 3225 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 3226 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 3227 3228 /* build local table of row and column ownerships */ 3229 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 3230 3231 /* build cache for off array entries formed */ 3232 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 3233 b->donotstash = PETSC_FALSE; 3234 b->colmap = PETSC_NULL; 3235 b->garray = PETSC_NULL; 3236 b->roworiented = PETSC_TRUE; 3237 3238 /* stuff used in block assembly */ 3239 b->barray = 0; 3240 3241 /* stuff used for matrix vector multiply */ 3242 b->lvec = 0; 3243 b->Mvctx = 0; 3244 3245 /* stuff for MatGetRow() */ 3246 b->rowindices = 0; 3247 b->rowvalues = 0; 3248 b->getrowactive = PETSC_FALSE; 3249 3250 /* hash table stuff */ 3251 b->ht = 0; 3252 b->hd = 0; 3253 b->ht_size = 0; 3254 b->ht_flag = PETSC_FALSE; 3255 b->ht_fact = 0; 3256 b->ht_total_ct = 0; 3257 b->ht_insert_ct = 0; 3258 3259 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 3260 b->ijonly = PETSC_FALSE; 3261 3262 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3263 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3264 if (flg) { 3265 PetscReal fact = 1.39; 3266 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3267 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 3268 if (fact <= 1.0) fact = 1.39; 3269 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3270 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3271 } 3272 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3273 3274 #if defined(PETSC_HAVE_MUMPS) 3275 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3276 #endif 3277 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 3278 "MatConvert_MPIBAIJ_MPIAdj", 3279 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3280 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C", 3281 "MatConvert_MPIBAIJ_MPIAIJ", 3282 MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3283 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C", 3284 "MatConvert_MPIBAIJ_MPISBAIJ", 3285 MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3286 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3287 "MatStoreValues_MPIBAIJ", 3288 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3289 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3290 "MatRetrieveValues_MPIBAIJ", 3291 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3292 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3293 "MatGetDiagonalBlock_MPIBAIJ", 3294 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3295 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3296 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3297 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3298 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3299 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3300 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3301 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3302 "MatDiagonalScaleLocal_MPIBAIJ", 3303 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3304 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3305 "MatSetHashTableFactor_MPIBAIJ", 3306 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3307 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C", 3308 "MatConvert_MPIBAIJ_MPIBSTRM", 3309 MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr); 3310 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3311 PetscFunctionReturn(0); 3312 } 3313 EXTERN_C_END 3314 3315 /*MC 3316 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3317 3318 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3319 and MATMPIBAIJ otherwise. 3320 3321 Options Database Keys: 3322 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3323 3324 Level: beginner 3325 3326 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3327 M*/ 3328 3329 #undef __FUNCT__ 3330 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3331 /*@C 3332 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3333 (block compressed row). For good matrix assembly performance 3334 the user should preallocate the matrix storage by setting the parameters 3335 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3336 performance can be increased by more than a factor of 50. 3337 3338 Collective on Mat 3339 3340 Input Parameters: 3341 + A - the matrix 3342 . bs - size of blockk 3343 . d_nz - number of block nonzeros per block row in diagonal portion of local 3344 submatrix (same for all local rows) 3345 . d_nnz - array containing the number of block nonzeros in the various block rows 3346 of the in diagonal portion of the local (possibly different for each block 3347 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3348 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3349 submatrix (same for all local rows). 3350 - o_nnz - array containing the number of nonzeros in the various block rows of the 3351 off-diagonal portion of the local submatrix (possibly different for 3352 each block row) or PETSC_NULL. 3353 3354 If the *_nnz parameter is given then the *_nz parameter is ignored 3355 3356 Options Database Keys: 3357 + -mat_block_size - size of the blocks to use 3358 - -mat_use_hash_table <fact> 3359 3360 Notes: 3361 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3362 than it must be used on all processors that share the object for that argument. 3363 3364 Storage Information: 3365 For a square global matrix we define each processor's diagonal portion 3366 to be its local rows and the corresponding columns (a square submatrix); 3367 each processor's off-diagonal portion encompasses the remainder of the 3368 local matrix (a rectangular submatrix). 3369 3370 The user can specify preallocated storage for the diagonal part of 3371 the local submatrix with either d_nz or d_nnz (not both). Set 3372 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3373 memory allocation. Likewise, specify preallocated storage for the 3374 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3375 3376 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3377 the figure below we depict these three local rows and all columns (0-11). 3378 3379 .vb 3380 0 1 2 3 4 5 6 7 8 9 10 11 3381 ------------------- 3382 row 3 | o o o d d d o o o o o o 3383 row 4 | o o o d d d o o o o o o 3384 row 5 | o o o d d d o o o o o o 3385 ------------------- 3386 .ve 3387 3388 Thus, any entries in the d locations are stored in the d (diagonal) 3389 submatrix, and any entries in the o locations are stored in the 3390 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3391 stored simply in the MATSEQBAIJ format for compressed row storage. 3392 3393 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3394 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3395 In general, for PDE problems in which most nonzeros are near the diagonal, 3396 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3397 or you will get TERRIBLE performance; see the users' manual chapter on 3398 matrices. 3399 3400 You can call MatGetInfo() to get information on how effective the preallocation was; 3401 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3402 You can also run with the option -info and look for messages with the string 3403 malloc in them to see if additional memory allocation was needed. 3404 3405 Level: intermediate 3406 3407 .keywords: matrix, block, aij, compressed row, sparse, parallel 3408 3409 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 3410 @*/ 3411 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3412 { 3413 PetscErrorCode ierr; 3414 3415 PetscFunctionBegin; 3416 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3417 PetscValidType(B,1); 3418 PetscValidLogicalCollectiveInt(B,bs,2); 3419 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3420 PetscFunctionReturn(0); 3421 } 3422 3423 #undef __FUNCT__ 3424 #define __FUNCT__ "MatCreateMPIBAIJ" 3425 /*@C 3426 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 3427 (block compressed row). For good matrix assembly performance 3428 the user should preallocate the matrix storage by setting the parameters 3429 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3430 performance can be increased by more than a factor of 50. 3431 3432 Collective on MPI_Comm 3433 3434 Input Parameters: 3435 + comm - MPI communicator 3436 . bs - size of blockk 3437 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3438 This value should be the same as the local size used in creating the 3439 y vector for the matrix-vector product y = Ax. 3440 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3441 This value should be the same as the local size used in creating the 3442 x vector for the matrix-vector product y = Ax. 3443 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3444 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3445 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3446 submatrix (same for all local rows) 3447 . d_nnz - array containing the number of nonzero blocks in the various block rows 3448 of the in diagonal portion of the local (possibly different for each block 3449 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3450 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3451 submatrix (same for all local rows). 3452 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3453 off-diagonal portion of the local submatrix (possibly different for 3454 each block row) or PETSC_NULL. 3455 3456 Output Parameter: 3457 . A - the matrix 3458 3459 Options Database Keys: 3460 + -mat_block_size - size of the blocks to use 3461 - -mat_use_hash_table <fact> 3462 3463 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3464 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3465 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3466 3467 Notes: 3468 If the *_nnz parameter is given then the *_nz parameter is ignored 3469 3470 A nonzero block is any block that as 1 or more nonzeros in it 3471 3472 The user MUST specify either the local or global matrix dimensions 3473 (possibly both). 3474 3475 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3476 than it must be used on all processors that share the object for that argument. 3477 3478 Storage Information: 3479 For a square global matrix we define each processor's diagonal portion 3480 to be its local rows and the corresponding columns (a square submatrix); 3481 each processor's off-diagonal portion encompasses the remainder of the 3482 local matrix (a rectangular submatrix). 3483 3484 The user can specify preallocated storage for the diagonal part of 3485 the local submatrix with either d_nz or d_nnz (not both). Set 3486 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3487 memory allocation. Likewise, specify preallocated storage for the 3488 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3489 3490 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3491 the figure below we depict these three local rows and all columns (0-11). 3492 3493 .vb 3494 0 1 2 3 4 5 6 7 8 9 10 11 3495 ------------------- 3496 row 3 | o o o d d d o o o o o o 3497 row 4 | o o o d d d o o o o o o 3498 row 5 | o o o d d d o o o o o o 3499 ------------------- 3500 .ve 3501 3502 Thus, any entries in the d locations are stored in the d (diagonal) 3503 submatrix, and any entries in the o locations are stored in the 3504 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3505 stored simply in the MATSEQBAIJ format for compressed row storage. 3506 3507 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3508 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3509 In general, for PDE problems in which most nonzeros are near the diagonal, 3510 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3511 or you will get TERRIBLE performance; see the users' manual chapter on 3512 matrices. 3513 3514 Level: intermediate 3515 3516 .keywords: matrix, block, aij, compressed row, sparse, parallel 3517 3518 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3519 @*/ 3520 PetscErrorCode MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3521 { 3522 PetscErrorCode ierr; 3523 PetscMPIInt size; 3524 3525 PetscFunctionBegin; 3526 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3527 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3528 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3529 if (size > 1) { 3530 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3531 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3532 } else { 3533 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3534 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3535 } 3536 PetscFunctionReturn(0); 3537 } 3538 3539 #undef __FUNCT__ 3540 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3541 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3542 { 3543 Mat mat; 3544 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3545 PetscErrorCode ierr; 3546 PetscInt len=0; 3547 3548 PetscFunctionBegin; 3549 *newmat = 0; 3550 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3551 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3552 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3553 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3554 3555 mat->factortype = matin->factortype; 3556 mat->preallocated = PETSC_TRUE; 3557 mat->assembled = PETSC_TRUE; 3558 mat->insertmode = NOT_SET_VALUES; 3559 3560 a = (Mat_MPIBAIJ*)mat->data; 3561 mat->rmap->bs = matin->rmap->bs; 3562 a->bs2 = oldmat->bs2; 3563 a->mbs = oldmat->mbs; 3564 a->nbs = oldmat->nbs; 3565 a->Mbs = oldmat->Mbs; 3566 a->Nbs = oldmat->Nbs; 3567 3568 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3569 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3570 3571 a->size = oldmat->size; 3572 a->rank = oldmat->rank; 3573 a->donotstash = oldmat->donotstash; 3574 a->roworiented = oldmat->roworiented; 3575 a->rowindices = 0; 3576 a->rowvalues = 0; 3577 a->getrowactive = PETSC_FALSE; 3578 a->barray = 0; 3579 a->rstartbs = oldmat->rstartbs; 3580 a->rendbs = oldmat->rendbs; 3581 a->cstartbs = oldmat->cstartbs; 3582 a->cendbs = oldmat->cendbs; 3583 3584 /* hash table stuff */ 3585 a->ht = 0; 3586 a->hd = 0; 3587 a->ht_size = 0; 3588 a->ht_flag = oldmat->ht_flag; 3589 a->ht_fact = oldmat->ht_fact; 3590 a->ht_total_ct = 0; 3591 a->ht_insert_ct = 0; 3592 3593 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3594 if (oldmat->colmap) { 3595 #if defined (PETSC_USE_CTABLE) 3596 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3597 #else 3598 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3599 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3600 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3601 #endif 3602 } else a->colmap = 0; 3603 3604 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3605 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3606 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3607 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3608 } else a->garray = 0; 3609 3610 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3611 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3612 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3613 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3614 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3615 3616 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3617 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3618 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3619 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3620 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3621 *newmat = mat; 3622 3623 PetscFunctionReturn(0); 3624 } 3625 3626 #undef __FUNCT__ 3627 #define __FUNCT__ "MatLoad_MPIBAIJ" 3628 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3629 { 3630 PetscErrorCode ierr; 3631 int fd; 3632 PetscInt i,nz,j,rstart,rend; 3633 PetscScalar *vals,*buf; 3634 MPI_Comm comm = ((PetscObject)viewer)->comm; 3635 MPI_Status status; 3636 PetscMPIInt rank,size,maxnz; 3637 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3638 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3639 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3640 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3641 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3642 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3643 3644 PetscFunctionBegin; 3645 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3646 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3647 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3648 3649 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3650 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3651 if (!rank) { 3652 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3653 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3654 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3655 } 3656 3657 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3658 3659 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3660 M = header[1]; N = header[2]; 3661 3662 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3663 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3664 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3665 3666 /* If global sizes are set, check if they are consistent with that given in the file */ 3667 if (sizesset) { 3668 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3669 } 3670 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3671 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3672 3673 if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices"); 3674 3675 /* 3676 This code adds extra rows to make sure the number of rows is 3677 divisible by the blocksize 3678 */ 3679 Mbs = M/bs; 3680 extra_rows = bs - M + bs*Mbs; 3681 if (extra_rows == bs) extra_rows = 0; 3682 else Mbs++; 3683 if (extra_rows && !rank) { 3684 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3685 } 3686 3687 /* determine ownership of all rows */ 3688 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3689 mbs = Mbs/size + ((Mbs % size) > rank); 3690 m = mbs*bs; 3691 } else { /* User set */ 3692 m = newmat->rmap->n; 3693 mbs = m/bs; 3694 } 3695 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3696 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3697 3698 /* process 0 needs enough room for process with most rows */ 3699 if (!rank) { 3700 mmax = rowners[1]; 3701 for (i=2; i<size; i++) { 3702 mmax = PetscMax(mmax,rowners[i]); 3703 } 3704 mmax*=bs; 3705 } else mmax = m; 3706 3707 rowners[0] = 0; 3708 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3709 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3710 rstart = rowners[rank]; 3711 rend = rowners[rank+1]; 3712 3713 /* distribute row lengths to all processors */ 3714 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3715 if (!rank) { 3716 mend = m; 3717 if (size == 1) mend = mend - extra_rows; 3718 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3719 for (j=mend; j<m; j++) locrowlens[j] = 1; 3720 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3721 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3722 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3723 for (j=0; j<m; j++) { 3724 procsnz[0] += locrowlens[j]; 3725 } 3726 for (i=1; i<size; i++) { 3727 mend = browners[i+1] - browners[i]; 3728 if (i == size-1) mend = mend - extra_rows; 3729 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3730 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3731 /* calculate the number of nonzeros on each processor */ 3732 for (j=0; j<browners[i+1]-browners[i]; j++) { 3733 procsnz[i] += rowlengths[j]; 3734 } 3735 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3736 } 3737 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3738 } else { 3739 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3740 } 3741 3742 if (!rank) { 3743 /* determine max buffer needed and allocate it */ 3744 maxnz = procsnz[0]; 3745 for (i=1; i<size; i++) { 3746 maxnz = PetscMax(maxnz,procsnz[i]); 3747 } 3748 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3749 3750 /* read in my part of the matrix column indices */ 3751 nz = procsnz[0]; 3752 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3753 mycols = ibuf; 3754 if (size == 1) nz -= extra_rows; 3755 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3756 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3757 3758 /* read in every ones (except the last) and ship off */ 3759 for (i=1; i<size-1; i++) { 3760 nz = procsnz[i]; 3761 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3762 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3763 } 3764 /* read in the stuff for the last proc */ 3765 if (size != 1) { 3766 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3767 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3768 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3769 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3770 } 3771 ierr = PetscFree(cols);CHKERRQ(ierr); 3772 } else { 3773 /* determine buffer space needed for message */ 3774 nz = 0; 3775 for (i=0; i<m; i++) { 3776 nz += locrowlens[i]; 3777 } 3778 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3779 mycols = ibuf; 3780 /* receive message of column indices*/ 3781 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3782 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3783 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3784 } 3785 3786 /* loop over local rows, determining number of off diagonal entries */ 3787 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3788 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3789 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3790 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3791 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3792 rowcount = 0; nzcount = 0; 3793 for (i=0; i<mbs; i++) { 3794 dcount = 0; 3795 odcount = 0; 3796 for (j=0; j<bs; j++) { 3797 kmax = locrowlens[rowcount]; 3798 for (k=0; k<kmax; k++) { 3799 tmp = mycols[nzcount++]/bs; 3800 if (!mask[tmp]) { 3801 mask[tmp] = 1; 3802 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3803 else masked1[dcount++] = tmp; 3804 } 3805 } 3806 rowcount++; 3807 } 3808 3809 dlens[i] = dcount; 3810 odlens[i] = odcount; 3811 3812 /* zero out the mask elements we set */ 3813 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3814 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3815 } 3816 3817 3818 if (!sizesset) { 3819 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3820 } 3821 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3822 3823 if (!rank) { 3824 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3825 /* read in my part of the matrix numerical values */ 3826 nz = procsnz[0]; 3827 vals = buf; 3828 mycols = ibuf; 3829 if (size == 1) nz -= extra_rows; 3830 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3831 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3832 3833 /* insert into matrix */ 3834 jj = rstart*bs; 3835 for (i=0; i<m; i++) { 3836 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3837 mycols += locrowlens[i]; 3838 vals += locrowlens[i]; 3839 jj++; 3840 } 3841 /* read in other processors (except the last one) and ship out */ 3842 for (i=1; i<size-1; i++) { 3843 nz = procsnz[i]; 3844 vals = buf; 3845 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3846 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3847 } 3848 /* the last proc */ 3849 if (size != 1){ 3850 nz = procsnz[i] - extra_rows; 3851 vals = buf; 3852 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3853 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3854 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3855 } 3856 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3857 } else { 3858 /* receive numeric values */ 3859 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3860 3861 /* receive message of values*/ 3862 vals = buf; 3863 mycols = ibuf; 3864 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 3865 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 3866 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3867 3868 /* insert into matrix */ 3869 jj = rstart*bs; 3870 for (i=0; i<m; i++) { 3871 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3872 mycols += locrowlens[i]; 3873 vals += locrowlens[i]; 3874 jj++; 3875 } 3876 } 3877 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3878 ierr = PetscFree(buf);CHKERRQ(ierr); 3879 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3880 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3881 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3882 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3883 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3884 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3885 3886 PetscFunctionReturn(0); 3887 } 3888 3889 #undef __FUNCT__ 3890 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3891 /*@ 3892 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3893 3894 Input Parameters: 3895 . mat - the matrix 3896 . fact - factor 3897 3898 Not Collective, each process can use a different factor 3899 3900 Level: advanced 3901 3902 Notes: 3903 This can also be set by the command line option: -mat_use_hash_table <fact> 3904 3905 .keywords: matrix, hashtable, factor, HT 3906 3907 .seealso: MatSetOption() 3908 @*/ 3909 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3910 { 3911 PetscErrorCode ierr; 3912 3913 PetscFunctionBegin; 3914 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3915 PetscFunctionReturn(0); 3916 } 3917 3918 EXTERN_C_BEGIN 3919 #undef __FUNCT__ 3920 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3921 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3922 { 3923 Mat_MPIBAIJ *baij; 3924 3925 PetscFunctionBegin; 3926 baij = (Mat_MPIBAIJ*)mat->data; 3927 baij->ht_fact = fact; 3928 PetscFunctionReturn(0); 3929 } 3930 EXTERN_C_END 3931 3932 #undef __FUNCT__ 3933 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3934 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3935 { 3936 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3937 PetscFunctionBegin; 3938 *Ad = a->A; 3939 *Ao = a->B; 3940 *colmap = a->garray; 3941 PetscFunctionReturn(0); 3942 } 3943 3944 /* 3945 Special version for direct calls from Fortran (to eliminate two function call overheads 3946 */ 3947 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3948 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3949 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3950 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3951 #endif 3952 3953 #undef __FUNCT__ 3954 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3955 /*@C 3956 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3957 3958 Collective on Mat 3959 3960 Input Parameters: 3961 + mat - the matrix 3962 . min - number of input rows 3963 . im - input rows 3964 . nin - number of input columns 3965 . in - input columns 3966 . v - numerical values input 3967 - addvin - INSERT_VALUES or ADD_VALUES 3968 3969 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3970 3971 Level: advanced 3972 3973 .seealso: MatSetValuesBlocked() 3974 @*/ 3975 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3976 { 3977 /* convert input arguments to C version */ 3978 Mat mat = *matin; 3979 PetscInt m = *min, n = *nin; 3980 InsertMode addv = *addvin; 3981 3982 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3983 const MatScalar *value; 3984 MatScalar *barray=baij->barray; 3985 PetscBool roworiented = baij->roworiented; 3986 PetscErrorCode ierr; 3987 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3988 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3989 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3990 3991 PetscFunctionBegin; 3992 /* tasks normally handled by MatSetValuesBlocked() */ 3993 if (mat->insertmode == NOT_SET_VALUES) { 3994 mat->insertmode = addv; 3995 } 3996 #if defined(PETSC_USE_DEBUG) 3997 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3998 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3999 #endif 4000 if (mat->assembled) { 4001 mat->was_assembled = PETSC_TRUE; 4002 mat->assembled = PETSC_FALSE; 4003 } 4004 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4005 4006 4007 if(!barray) { 4008 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 4009 baij->barray = barray; 4010 } 4011 4012 if (roworiented) { 4013 stepval = (n-1)*bs; 4014 } else { 4015 stepval = (m-1)*bs; 4016 } 4017 for (i=0; i<m; i++) { 4018 if (im[i] < 0) continue; 4019 #if defined(PETSC_USE_DEBUG) 4020 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 4021 #endif 4022 if (im[i] >= rstart && im[i] < rend) { 4023 row = im[i] - rstart; 4024 for (j=0; j<n; j++) { 4025 /* If NumCol = 1 then a copy is not required */ 4026 if ((roworiented) && (n == 1)) { 4027 barray = (MatScalar*)v + i*bs2; 4028 } else if((!roworiented) && (m == 1)) { 4029 barray = (MatScalar*)v + j*bs2; 4030 } else { /* Here a copy is required */ 4031 if (roworiented) { 4032 value = v + i*(stepval+bs)*bs + j*bs; 4033 } else { 4034 value = v + j*(stepval+bs)*bs + i*bs; 4035 } 4036 for (ii=0; ii<bs; ii++,value+=stepval) { 4037 for (jj=0; jj<bs; jj++) { 4038 *barray++ = *value++; 4039 } 4040 } 4041 barray -=bs2; 4042 } 4043 4044 if (in[j] >= cstart && in[j] < cend){ 4045 col = in[j] - cstart; 4046 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4047 } 4048 else if (in[j] < 0) continue; 4049 #if defined(PETSC_USE_DEBUG) 4050 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 4051 #endif 4052 else { 4053 if (mat->was_assembled) { 4054 if (!baij->colmap) { 4055 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 4056 } 4057 4058 #if defined(PETSC_USE_DEBUG) 4059 #if defined (PETSC_USE_CTABLE) 4060 { PetscInt data; 4061 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 4062 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4063 } 4064 #else 4065 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4066 #endif 4067 #endif 4068 #if defined (PETSC_USE_CTABLE) 4069 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4070 col = (col - 1)/bs; 4071 #else 4072 col = (baij->colmap[in[j]] - 1)/bs; 4073 #endif 4074 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 4075 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 4076 col = in[j]; 4077 } 4078 } 4079 else col = in[j]; 4080 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4081 } 4082 } 4083 } else { 4084 if (!baij->donotstash) { 4085 if (roworiented) { 4086 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4087 } else { 4088 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4089 } 4090 } 4091 } 4092 } 4093 4094 /* task normally handled by MatSetValuesBlocked() */ 4095 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4096 PetscFunctionReturn(0); 4097 } 4098 4099 #undef __FUNCT__ 4100 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 4101 /*@ 4102 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 4103 CSR format the local rows. 4104 4105 Collective on MPI_Comm 4106 4107 Input Parameters: 4108 + comm - MPI communicator 4109 . bs - the block size, only a block size of 1 is supported 4110 . m - number of local rows (Cannot be PETSC_DECIDE) 4111 . n - This value should be the same as the local size used in creating the 4112 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4113 calculated if N is given) For square matrices n is almost always m. 4114 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4115 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4116 . i - row indices 4117 . j - column indices 4118 - a - matrix values 4119 4120 Output Parameter: 4121 . mat - the matrix 4122 4123 Level: intermediate 4124 4125 Notes: 4126 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4127 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4128 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4129 4130 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4131 4132 .keywords: matrix, aij, compressed row, sparse, parallel 4133 4134 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4135 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 4136 @*/ 4137 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4138 { 4139 PetscErrorCode ierr; 4140 4141 4142 PetscFunctionBegin; 4143 if (i[0]) { 4144 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4145 } 4146 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4147 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4148 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4149 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 4150 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 4151 PetscFunctionReturn(0); 4152 } 4153