1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 extern PetscErrorCode DisAssemble_MPIBAIJ(Mat); 7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec); 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 17 { 18 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 19 PetscErrorCode ierr; 20 PetscInt i,*idxb = 0; 21 PetscScalar *va,*vb; 22 Vec vtmp; 23 24 PetscFunctionBegin; 25 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 26 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 27 if (idx) { 28 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 29 } 30 31 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 32 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 33 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 34 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 35 36 for (i=0; i<A->rmap->n; i++){ 37 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 38 } 39 40 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 41 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 42 ierr = PetscFree(idxb);CHKERRQ(ierr); 43 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 44 PetscFunctionReturn(0); 45 } 46 47 EXTERN_C_BEGIN 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 50 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 51 { 52 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 53 PetscErrorCode ierr; 54 55 PetscFunctionBegin; 56 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 57 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 EXTERN_C_END 61 62 EXTERN_C_BEGIN 63 #undef __FUNCT__ 64 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 65 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 66 { 67 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 68 PetscErrorCode ierr; 69 70 PetscFunctionBegin; 71 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 72 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 73 PetscFunctionReturn(0); 74 } 75 EXTERN_C_END 76 77 /* 78 Local utility routine that creates a mapping from the global column 79 number to the local number in the off-diagonal part of the local 80 storage of the matrix. This is done in a non scalable way since the 81 length of colmap equals the global matrix length. 82 */ 83 #undef __FUNCT__ 84 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 85 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 86 { 87 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 88 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 89 PetscErrorCode ierr; 90 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 91 92 PetscFunctionBegin; 93 #if defined (PETSC_USE_CTABLE) 94 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 95 for (i=0; i<nbs; i++){ 96 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 97 } 98 #else 99 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 100 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 101 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 102 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 103 #endif 104 PetscFunctionReturn(0); 105 } 106 107 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 108 { \ 109 \ 110 brow = row/bs; \ 111 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 112 rmax = aimax[brow]; nrow = ailen[brow]; \ 113 bcol = col/bs; \ 114 ridx = row % bs; cidx = col % bs; \ 115 low = 0; high = nrow; \ 116 while (high-low > 3) { \ 117 t = (low+high)/2; \ 118 if (rp[t] > bcol) high = t; \ 119 else low = t; \ 120 } \ 121 for (_i=low; _i<high; _i++) { \ 122 if (rp[_i] > bcol) break; \ 123 if (rp[_i] == bcol) { \ 124 bap = ap + bs2*_i + bs*cidx + ridx; \ 125 if (addv == ADD_VALUES) *bap += value; \ 126 else *bap = value; \ 127 goto a_noinsert; \ 128 } \ 129 } \ 130 if (a->nonew == 1) goto a_noinsert; \ 131 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 132 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 133 N = nrow++ - 1; \ 134 /* shift up all the later entries in this row */ \ 135 for (ii=N; ii>=_i; ii--) { \ 136 rp[ii+1] = rp[ii]; \ 137 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 138 } \ 139 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 140 rp[_i] = bcol; \ 141 ap[bs2*_i + bs*cidx + ridx] = value; \ 142 a_noinsert:; \ 143 ailen[brow] = nrow; \ 144 } 145 146 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 147 { \ 148 brow = row/bs; \ 149 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 150 rmax = bimax[brow]; nrow = bilen[brow]; \ 151 bcol = col/bs; \ 152 ridx = row % bs; cidx = col % bs; \ 153 low = 0; high = nrow; \ 154 while (high-low > 3) { \ 155 t = (low+high)/2; \ 156 if (rp[t] > bcol) high = t; \ 157 else low = t; \ 158 } \ 159 for (_i=low; _i<high; _i++) { \ 160 if (rp[_i] > bcol) break; \ 161 if (rp[_i] == bcol) { \ 162 bap = ap + bs2*_i + bs*cidx + ridx; \ 163 if (addv == ADD_VALUES) *bap += value; \ 164 else *bap = value; \ 165 goto b_noinsert; \ 166 } \ 167 } \ 168 if (b->nonew == 1) goto b_noinsert; \ 169 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 170 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 171 CHKMEMQ;\ 172 N = nrow++ - 1; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 177 } \ 178 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 179 rp[_i] = bcol; \ 180 ap[bs2*_i + bs*cidx + ridx] = value; \ 181 b_noinsert:; \ 182 bilen[brow] = nrow; \ 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatSetValues_MPIBAIJ" 187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 188 { 189 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 190 MatScalar value; 191 PetscBool roworiented = baij->roworiented; 192 PetscErrorCode ierr; 193 PetscInt i,j,row,col; 194 PetscInt rstart_orig=mat->rmap->rstart; 195 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 196 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 197 198 /* Some Variables required in the macro */ 199 Mat A = baij->A; 200 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 201 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 202 MatScalar *aa=a->a; 203 204 Mat B = baij->B; 205 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 206 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 207 MatScalar *ba=b->a; 208 209 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 210 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 211 MatScalar *ap,*bap; 212 213 PetscFunctionBegin; 214 if (v) PetscValidScalarPointer(v,6); 215 for (i=0; i<m; i++) { 216 if (im[i] < 0) continue; 217 #if defined(PETSC_USE_DEBUG) 218 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 219 #endif 220 if (im[i] >= rstart_orig && im[i] < rend_orig) { 221 row = im[i] - rstart_orig; 222 for (j=0; j<n; j++) { 223 if (in[j] >= cstart_orig && in[j] < cend_orig){ 224 col = in[j] - cstart_orig; 225 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 226 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 227 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 228 } else if (in[j] < 0) continue; 229 #if defined(PETSC_USE_DEBUG) 230 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 231 #endif 232 else { 233 if (mat->was_assembled) { 234 if (!baij->colmap) { 235 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 236 } 237 #if defined (PETSC_USE_CTABLE) 238 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 239 col = col - 1; 240 #else 241 col = baij->colmap[in[j]/bs] - 1; 242 #endif 243 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 244 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 245 col = in[j]; 246 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 247 B = baij->B; 248 b = (Mat_SeqBAIJ*)(B)->data; 249 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 250 ba=b->a; 251 } else col += in[j]%bs; 252 } else col = in[j]; 253 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 254 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 255 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 256 } 257 } 258 } else { 259 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 260 if (!baij->donotstash) { 261 if (roworiented) { 262 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 263 } else { 264 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 265 } 266 } 267 } 268 } 269 PetscFunctionReturn(0); 270 } 271 272 #undef __FUNCT__ 273 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 274 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 275 { 276 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 277 const PetscScalar *value; 278 MatScalar *barray=baij->barray; 279 PetscBool roworiented = baij->roworiented; 280 PetscErrorCode ierr; 281 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 282 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 283 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 284 285 PetscFunctionBegin; 286 if(!barray) { 287 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 288 baij->barray = barray; 289 } 290 291 if (roworiented) { 292 stepval = (n-1)*bs; 293 } else { 294 stepval = (m-1)*bs; 295 } 296 for (i=0; i<m; i++) { 297 if (im[i] < 0) continue; 298 #if defined(PETSC_USE_DEBUG) 299 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 300 #endif 301 if (im[i] >= rstart && im[i] < rend) { 302 row = im[i] - rstart; 303 for (j=0; j<n; j++) { 304 /* If NumCol = 1 then a copy is not required */ 305 if ((roworiented) && (n == 1)) { 306 barray = (MatScalar*)v + i*bs2; 307 } else if((!roworiented) && (m == 1)) { 308 barray = (MatScalar*)v + j*bs2; 309 } else { /* Here a copy is required */ 310 if (roworiented) { 311 value = v + (i*(stepval+bs) + j)*bs; 312 } else { 313 value = v + (j*(stepval+bs) + i)*bs; 314 } 315 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 316 for (jj=0; jj<bs; jj++) { 317 barray[jj] = value[jj]; 318 } 319 barray += bs; 320 } 321 barray -= bs2; 322 } 323 324 if (in[j] >= cstart && in[j] < cend){ 325 col = in[j] - cstart; 326 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 327 } 328 else if (in[j] < 0) continue; 329 #if defined(PETSC_USE_DEBUG) 330 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 331 #endif 332 else { 333 if (mat->was_assembled) { 334 if (!baij->colmap) { 335 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 336 } 337 338 #if defined(PETSC_USE_DEBUG) 339 #if defined (PETSC_USE_CTABLE) 340 { PetscInt data; 341 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 342 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 343 } 344 #else 345 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 346 #endif 347 #endif 348 #if defined (PETSC_USE_CTABLE) 349 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 350 col = (col - 1)/bs; 351 #else 352 col = (baij->colmap[in[j]] - 1)/bs; 353 #endif 354 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 355 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 356 col = in[j]; 357 } 358 } 359 else col = in[j]; 360 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 361 } 362 } 363 } else { 364 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 365 if (!baij->donotstash) { 366 if (roworiented) { 367 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 368 } else { 369 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 370 } 371 } 372 } 373 } 374 PetscFunctionReturn(0); 375 } 376 377 #define HASH_KEY 0.6180339887 378 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 379 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 380 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 381 #undef __FUNCT__ 382 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 383 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 384 { 385 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 386 PetscBool roworiented = baij->roworiented; 387 PetscErrorCode ierr; 388 PetscInt i,j,row,col; 389 PetscInt rstart_orig=mat->rmap->rstart; 390 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 391 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 392 PetscReal tmp; 393 MatScalar **HD = baij->hd,value; 394 #if defined(PETSC_USE_DEBUG) 395 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 396 #endif 397 398 PetscFunctionBegin; 399 if (v) PetscValidScalarPointer(v,6); 400 for (i=0; i<m; i++) { 401 #if defined(PETSC_USE_DEBUG) 402 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 403 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 404 #endif 405 row = im[i]; 406 if (row >= rstart_orig && row < rend_orig) { 407 for (j=0; j<n; j++) { 408 col = in[j]; 409 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 410 /* Look up PetscInto the Hash Table */ 411 key = (row/bs)*Nbs+(col/bs)+1; 412 h1 = HASH(size,key,tmp); 413 414 415 idx = h1; 416 #if defined(PETSC_USE_DEBUG) 417 insert_ct++; 418 total_ct++; 419 if (HT[idx] != key) { 420 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 421 if (idx == size) { 422 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 423 if (idx == h1) { 424 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 425 } 426 } 427 } 428 #else 429 if (HT[idx] != key) { 430 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 431 if (idx == size) { 432 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 433 if (idx == h1) { 434 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 435 } 436 } 437 } 438 #endif 439 /* A HASH table entry is found, so insert the values at the correct address */ 440 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 441 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 442 } 443 } else { 444 if (!baij->donotstash) { 445 if (roworiented) { 446 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 447 } else { 448 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 449 } 450 } 451 } 452 } 453 #if defined(PETSC_USE_DEBUG) 454 baij->ht_total_ct = total_ct; 455 baij->ht_insert_ct = insert_ct; 456 #endif 457 PetscFunctionReturn(0); 458 } 459 460 #undef __FUNCT__ 461 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 462 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 463 { 464 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 465 PetscBool roworiented = baij->roworiented; 466 PetscErrorCode ierr; 467 PetscInt i,j,ii,jj,row,col; 468 PetscInt rstart=baij->rstartbs; 469 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 470 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 471 PetscReal tmp; 472 MatScalar **HD = baij->hd,*baij_a; 473 const PetscScalar *v_t,*value; 474 #if defined(PETSC_USE_DEBUG) 475 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 476 #endif 477 478 PetscFunctionBegin; 479 480 if (roworiented) { 481 stepval = (n-1)*bs; 482 } else { 483 stepval = (m-1)*bs; 484 } 485 for (i=0; i<m; i++) { 486 #if defined(PETSC_USE_DEBUG) 487 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 488 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 489 #endif 490 row = im[i]; 491 v_t = v + i*nbs2; 492 if (row >= rstart && row < rend) { 493 for (j=0; j<n; j++) { 494 col = in[j]; 495 496 /* Look up into the Hash Table */ 497 key = row*Nbs+col+1; 498 h1 = HASH(size,key,tmp); 499 500 idx = h1; 501 #if defined(PETSC_USE_DEBUG) 502 total_ct++; 503 insert_ct++; 504 if (HT[idx] != key) { 505 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 506 if (idx == size) { 507 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 508 if (idx == h1) { 509 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 510 } 511 } 512 } 513 #else 514 if (HT[idx] != key) { 515 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 516 if (idx == size) { 517 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 518 if (idx == h1) { 519 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 520 } 521 } 522 } 523 #endif 524 baij_a = HD[idx]; 525 if (roworiented) { 526 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 527 /* value = v + (i*(stepval+bs)+j)*bs; */ 528 value = v_t; 529 v_t += bs; 530 if (addv == ADD_VALUES) { 531 for (ii=0; ii<bs; ii++,value+=stepval) { 532 for (jj=ii; jj<bs2; jj+=bs) { 533 baij_a[jj] += *value++; 534 } 535 } 536 } else { 537 for (ii=0; ii<bs; ii++,value+=stepval) { 538 for (jj=ii; jj<bs2; jj+=bs) { 539 baij_a[jj] = *value++; 540 } 541 } 542 } 543 } else { 544 value = v + j*(stepval+bs)*bs + i*bs; 545 if (addv == ADD_VALUES) { 546 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 547 for (jj=0; jj<bs; jj++) { 548 baij_a[jj] += *value++; 549 } 550 } 551 } else { 552 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 553 for (jj=0; jj<bs; jj++) { 554 baij_a[jj] = *value++; 555 } 556 } 557 } 558 } 559 } 560 } else { 561 if (!baij->donotstash) { 562 if (roworiented) { 563 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 564 } else { 565 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 566 } 567 } 568 } 569 } 570 #if defined(PETSC_USE_DEBUG) 571 baij->ht_total_ct = total_ct; 572 baij->ht_insert_ct = insert_ct; 573 #endif 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatGetValues_MPIBAIJ" 579 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 580 { 581 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 582 PetscErrorCode ierr; 583 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 584 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 585 586 PetscFunctionBegin; 587 for (i=0; i<m; i++) { 588 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 589 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 590 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 591 row = idxm[i] - bsrstart; 592 for (j=0; j<n; j++) { 593 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 594 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 595 if (idxn[j] >= bscstart && idxn[j] < bscend){ 596 col = idxn[j] - bscstart; 597 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 598 } else { 599 if (!baij->colmap) { 600 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 601 } 602 #if defined (PETSC_USE_CTABLE) 603 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 604 data --; 605 #else 606 data = baij->colmap[idxn[j]/bs]-1; 607 #endif 608 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 609 else { 610 col = data + idxn[j]%bs; 611 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 612 } 613 } 614 } 615 } else { 616 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 617 } 618 } 619 PetscFunctionReturn(0); 620 } 621 622 #undef __FUNCT__ 623 #define __FUNCT__ "MatNorm_MPIBAIJ" 624 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 625 { 626 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 627 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 628 PetscErrorCode ierr; 629 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 630 PetscReal sum = 0.0; 631 MatScalar *v; 632 633 PetscFunctionBegin; 634 if (baij->size == 1) { 635 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 636 } else { 637 if (type == NORM_FROBENIUS) { 638 v = amat->a; 639 nz = amat->nz*bs2; 640 for (i=0; i<nz; i++) { 641 #if defined(PETSC_USE_COMPLEX) 642 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 643 #else 644 sum += (*v)*(*v); v++; 645 #endif 646 } 647 v = bmat->a; 648 nz = bmat->nz*bs2; 649 for (i=0; i<nz; i++) { 650 #if defined(PETSC_USE_COMPLEX) 651 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 652 #else 653 sum += (*v)*(*v); v++; 654 #endif 655 } 656 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 657 *nrm = sqrt(*nrm); 658 } else if (type == NORM_1) { /* max column sum */ 659 PetscReal *tmp,*tmp2; 660 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 661 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 662 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 663 v = amat->a; jj = amat->j; 664 for (i=0; i<amat->nz; i++) { 665 for (j=0; j<bs; j++){ 666 col = bs*(cstart + *jj) + j; /* column index */ 667 for (row=0; row<bs; row++){ 668 tmp[col] += PetscAbsScalar(*v); v++; 669 } 670 } 671 jj++; 672 } 673 v = bmat->a; jj = bmat->j; 674 for (i=0; i<bmat->nz; i++) { 675 for (j=0; j<bs; j++){ 676 col = bs*garray[*jj] + j; 677 for (row=0; row<bs; row++){ 678 tmp[col] += PetscAbsScalar(*v); v++; 679 } 680 } 681 jj++; 682 } 683 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 684 *nrm = 0.0; 685 for (j=0; j<mat->cmap->N; j++) { 686 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 687 } 688 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 689 } else if (type == NORM_INFINITY) { /* max row sum */ 690 PetscReal *sums; 691 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 692 sum = 0.0; 693 for (j=0; j<amat->mbs; j++) { 694 for (row=0; row<bs; row++) sums[row] = 0.0; 695 v = amat->a + bs2*amat->i[j]; 696 nz = amat->i[j+1]-amat->i[j]; 697 for (i=0; i<nz; i++) { 698 for (col=0; col<bs; col++){ 699 for (row=0; row<bs; row++){ 700 sums[row] += PetscAbsScalar(*v); v++; 701 } 702 } 703 } 704 v = bmat->a + bs2*bmat->i[j]; 705 nz = bmat->i[j+1]-bmat->i[j]; 706 for (i=0; i<nz; i++) { 707 for (col=0; col<bs; col++){ 708 for (row=0; row<bs; row++){ 709 sums[row] += PetscAbsScalar(*v); v++; 710 } 711 } 712 } 713 for (row=0; row<bs; row++){ 714 if (sums[row] > sum) sum = sums[row]; 715 } 716 } 717 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 718 ierr = PetscFree(sums);CHKERRQ(ierr); 719 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet"); 720 } 721 PetscFunctionReturn(0); 722 } 723 724 /* 725 Creates the hash table, and sets the table 726 This table is created only once. 727 If new entried need to be added to the matrix 728 then the hash table has to be destroyed and 729 recreated. 730 */ 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 733 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 734 { 735 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 736 Mat A = baij->A,B=baij->B; 737 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 738 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 739 PetscErrorCode ierr; 740 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 741 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 742 PetscInt *HT,key; 743 MatScalar **HD; 744 PetscReal tmp; 745 #if defined(PETSC_USE_INFO) 746 PetscInt ct=0,max=0; 747 #endif 748 749 PetscFunctionBegin; 750 if (baij->ht) PetscFunctionReturn(0); 751 752 baij->ht_size = (PetscInt)(factor*nz); 753 ht_size = baij->ht_size; 754 755 /* Allocate Memory for Hash Table */ 756 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 757 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 758 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 759 HD = baij->hd; 760 HT = baij->ht; 761 762 /* Loop Over A */ 763 for (i=0; i<a->mbs; i++) { 764 for (j=ai[i]; j<ai[i+1]; j++) { 765 row = i+rstart; 766 col = aj[j]+cstart; 767 768 key = row*Nbs + col + 1; 769 h1 = HASH(ht_size,key,tmp); 770 for (k=0; k<ht_size; k++){ 771 if (!HT[(h1+k)%ht_size]) { 772 HT[(h1+k)%ht_size] = key; 773 HD[(h1+k)%ht_size] = a->a + j*bs2; 774 break; 775 #if defined(PETSC_USE_INFO) 776 } else { 777 ct++; 778 #endif 779 } 780 } 781 #if defined(PETSC_USE_INFO) 782 if (k> max) max = k; 783 #endif 784 } 785 } 786 /* Loop Over B */ 787 for (i=0; i<b->mbs; i++) { 788 for (j=bi[i]; j<bi[i+1]; j++) { 789 row = i+rstart; 790 col = garray[bj[j]]; 791 key = row*Nbs + col + 1; 792 h1 = HASH(ht_size,key,tmp); 793 for (k=0; k<ht_size; k++){ 794 if (!HT[(h1+k)%ht_size]) { 795 HT[(h1+k)%ht_size] = key; 796 HD[(h1+k)%ht_size] = b->a + j*bs2; 797 break; 798 #if defined(PETSC_USE_INFO) 799 } else { 800 ct++; 801 #endif 802 } 803 } 804 #if defined(PETSC_USE_INFO) 805 if (k> max) max = k; 806 #endif 807 } 808 } 809 810 /* Print Summary */ 811 #if defined(PETSC_USE_INFO) 812 for (i=0,j=0; i<ht_size; i++) { 813 if (HT[i]) {j++;} 814 } 815 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 816 #endif 817 PetscFunctionReturn(0); 818 } 819 820 #undef __FUNCT__ 821 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 822 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 823 { 824 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 825 PetscErrorCode ierr; 826 PetscInt nstash,reallocs; 827 InsertMode addv; 828 829 PetscFunctionBegin; 830 if (baij->donotstash || mat->nooffprocentries) { 831 PetscFunctionReturn(0); 832 } 833 834 /* make sure all processors are either in INSERTMODE or ADDMODE */ 835 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 836 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 837 mat->insertmode = addv; /* in case this processor had no cache */ 838 839 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 840 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 841 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 842 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 843 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 844 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 845 PetscFunctionReturn(0); 846 } 847 848 #undef __FUNCT__ 849 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 850 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 851 { 852 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 853 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 854 PetscErrorCode ierr; 855 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 856 PetscInt *row,*col; 857 PetscBool r1,r2,r3,other_disassembled; 858 MatScalar *val; 859 InsertMode addv = mat->insertmode; 860 PetscMPIInt n; 861 862 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 863 PetscFunctionBegin; 864 if (!baij->donotstash && !mat->nooffprocentries) { 865 while (1) { 866 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 867 if (!flg) break; 868 869 for (i=0; i<n;) { 870 /* Now identify the consecutive vals belonging to the same row */ 871 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 872 if (j < n) ncols = j-i; 873 else ncols = n-i; 874 /* Now assemble all these values with a single function call */ 875 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 876 i = j; 877 } 878 } 879 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 880 /* Now process the block-stash. Since the values are stashed column-oriented, 881 set the roworiented flag to column oriented, and after MatSetValues() 882 restore the original flags */ 883 r1 = baij->roworiented; 884 r2 = a->roworiented; 885 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 886 baij->roworiented = PETSC_FALSE; 887 a->roworiented = PETSC_FALSE; 888 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 889 while (1) { 890 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 891 if (!flg) break; 892 893 for (i=0; i<n;) { 894 /* Now identify the consecutive vals belonging to the same row */ 895 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 896 if (j < n) ncols = j-i; 897 else ncols = n-i; 898 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 899 i = j; 900 } 901 } 902 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 903 baij->roworiented = r1; 904 a->roworiented = r2; 905 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 906 } 907 908 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 909 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 910 911 /* determine if any processor has disassembled, if so we must 912 also disassemble ourselfs, in order that we may reassemble. */ 913 /* 914 if nonzero structure of submatrix B cannot change then we know that 915 no processor disassembled thus we can skip this stuff 916 */ 917 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 918 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 919 if (mat->was_assembled && !other_disassembled) { 920 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 921 } 922 } 923 924 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 925 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 926 } 927 ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 928 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 929 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 930 931 #if defined(PETSC_USE_INFO) 932 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 933 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 934 baij->ht_total_ct = 0; 935 baij->ht_insert_ct = 0; 936 } 937 #endif 938 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 939 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 940 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 941 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 942 } 943 944 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 945 baij->rowvalues = 0; 946 PetscFunctionReturn(0); 947 } 948 949 #undef __FUNCT__ 950 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 951 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 952 { 953 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 954 PetscErrorCode ierr; 955 PetscMPIInt size = baij->size,rank = baij->rank; 956 PetscInt bs = mat->rmap->bs; 957 PetscBool iascii,isdraw; 958 PetscViewer sviewer; 959 PetscViewerFormat format; 960 961 PetscFunctionBegin; 962 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 963 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 964 if (iascii) { 965 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 966 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 967 MatInfo info; 968 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 969 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 970 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 971 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 972 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 973 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 974 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 975 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 976 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 977 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 978 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 979 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 980 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 981 PetscFunctionReturn(0); 982 } else if (format == PETSC_VIEWER_ASCII_INFO) { 983 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 984 PetscFunctionReturn(0); 985 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 986 PetscFunctionReturn(0); 987 } 988 } 989 990 if (isdraw) { 991 PetscDraw draw; 992 PetscBool isnull; 993 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 994 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 995 } 996 997 if (size == 1) { 998 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 999 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1000 } else { 1001 /* assemble the entire matrix onto first processor. */ 1002 Mat A; 1003 Mat_SeqBAIJ *Aloc; 1004 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1005 MatScalar *a; 1006 1007 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1008 /* Perhaps this should be the type of mat? */ 1009 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1010 if (!rank) { 1011 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1012 } else { 1013 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1014 } 1015 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1016 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1017 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1018 1019 /* copy over the A part */ 1020 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1021 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1022 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1023 1024 for (i=0; i<mbs; i++) { 1025 rvals[0] = bs*(baij->rstartbs + i); 1026 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1027 for (j=ai[i]; j<ai[i+1]; j++) { 1028 col = (baij->cstartbs+aj[j])*bs; 1029 for (k=0; k<bs; k++) { 1030 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1031 col++; a += bs; 1032 } 1033 } 1034 } 1035 /* copy over the B part */ 1036 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1037 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1038 for (i=0; i<mbs; i++) { 1039 rvals[0] = bs*(baij->rstartbs + i); 1040 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1041 for (j=ai[i]; j<ai[i+1]; j++) { 1042 col = baij->garray[aj[j]]*bs; 1043 for (k=0; k<bs; k++) { 1044 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1045 col++; a += bs; 1046 } 1047 } 1048 } 1049 ierr = PetscFree(rvals);CHKERRQ(ierr); 1050 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1051 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1052 /* 1053 Everyone has to call to draw the matrix since the graphics waits are 1054 synchronized across all processors that share the PetscDraw object 1055 */ 1056 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1057 if (!rank) { 1058 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1059 /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/ 1060 PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ); 1061 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1062 } 1063 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1064 ierr = MatDestroy(&A);CHKERRQ(ierr); 1065 } 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1071 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1072 { 1073 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1074 Mat_SeqBAIJ* A = (Mat_SeqBAIJ*)a->A->data; 1075 Mat_SeqBAIJ* B = (Mat_SeqBAIJ*)a->B->data; 1076 PetscErrorCode ierr; 1077 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1078 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1079 int fd; 1080 PetscScalar *column_values; 1081 FILE *file; 1082 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1083 PetscInt message_count,flowcontrolcount; 1084 1085 PetscFunctionBegin; 1086 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1087 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1088 nz = bs2*(A->nz + B->nz); 1089 rlen = mat->rmap->n; 1090 if (!rank) { 1091 header[0] = MAT_FILE_CLASSID; 1092 header[1] = mat->rmap->N; 1093 header[2] = mat->cmap->N; 1094 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1095 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1096 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1097 /* get largest number of rows any processor has */ 1098 range = mat->rmap->range; 1099 for (i=1; i<size; i++) { 1100 rlen = PetscMax(rlen,range[i+1] - range[i]); 1101 } 1102 } else { 1103 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1104 } 1105 1106 ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr); 1107 /* compute lengths of each row */ 1108 for (i=0; i<a->mbs; i++) { 1109 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1110 } 1111 /* store the row lengths to the file */ 1112 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1113 if (!rank) { 1114 MPI_Status status; 1115 ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr); 1116 rlen = (range[1] - range[0])/bs; 1117 for (i=0; i<rlen; i++) { 1118 for (j=0; j<bs; j++) { 1119 row_lens[i*bs+j] = bs*crow_lens[i]; 1120 } 1121 } 1122 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1123 for (i=1; i<size; i++) { 1124 rlen = (range[i+1] - range[i])/bs; 1125 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1126 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1127 for (k=0; k<rlen; k++) { 1128 for (j=0; j<bs; j++) { 1129 row_lens[k*bs+j] = bs*crow_lens[k]; 1130 } 1131 } 1132 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1133 } 1134 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1135 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1136 } else { 1137 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1138 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1139 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1140 } 1141 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1142 1143 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1144 information needed to make it for each row from a block row. This does require more communication but still not more than 1145 the communication needed for the nonzero values */ 1146 nzmax = nz; /* space a largest processor needs */ 1147 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1148 ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1149 cnt = 0; 1150 for (i=0; i<a->mbs; i++) { 1151 pcnt = cnt; 1152 for (j=B->i[i]; j<B->i[i+1]; j++) { 1153 if ( (col = garray[B->j[j]]) > cstart) break; 1154 for (l=0; l<bs; l++) { 1155 column_indices[cnt++] = bs*col+l; 1156 } 1157 } 1158 for (k=A->i[i]; k<A->i[i+1]; k++) { 1159 for (l=0; l<bs; l++) { 1160 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1161 } 1162 } 1163 for (; j<B->i[i+1]; j++) { 1164 for (l=0; l<bs; l++) { 1165 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1166 } 1167 } 1168 len = cnt - pcnt; 1169 for (k=1; k<bs; k++) { 1170 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1171 cnt += len; 1172 } 1173 } 1174 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1175 1176 /* store the columns to the file */ 1177 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1178 if (!rank) { 1179 MPI_Status status; 1180 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1181 for (i=1; i<size; i++) { 1182 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1183 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1184 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1185 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1186 } 1187 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1188 } else { 1189 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1190 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1191 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1192 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1193 } 1194 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1195 1196 /* load up the numerical values */ 1197 ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1198 cnt = 0; 1199 for (i=0; i<a->mbs; i++) { 1200 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1201 for (j=B->i[i]; j<B->i[i+1]; j++) { 1202 if ( garray[B->j[j]] > cstart) break; 1203 for (l=0; l<bs; l++) { 1204 for (ll=0; ll<bs; ll++) { 1205 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1206 } 1207 } 1208 cnt += bs; 1209 } 1210 for (k=A->i[i]; k<A->i[i+1]; k++) { 1211 for (l=0; l<bs; l++) { 1212 for (ll=0; ll<bs; ll++) { 1213 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1214 } 1215 } 1216 cnt += bs; 1217 } 1218 for (; j<B->i[i+1]; j++) { 1219 for (l=0; l<bs; l++) { 1220 for (ll=0; ll<bs; ll++) { 1221 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1222 } 1223 } 1224 cnt += bs; 1225 } 1226 cnt += (bs-1)*rlen; 1227 } 1228 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1229 1230 /* store the column values to the file */ 1231 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1232 if (!rank) { 1233 MPI_Status status; 1234 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1235 for (i=1; i<size; i++) { 1236 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1237 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1238 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1239 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1240 } 1241 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1242 } else { 1243 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1244 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1245 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1246 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1247 } 1248 ierr = PetscFree(column_values);CHKERRQ(ierr); 1249 1250 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1251 if (file) { 1252 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1253 } 1254 PetscFunctionReturn(0); 1255 } 1256 1257 #undef __FUNCT__ 1258 #define __FUNCT__ "MatView_MPIBAIJ" 1259 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1260 { 1261 PetscErrorCode ierr; 1262 PetscBool iascii,isdraw,issocket,isbinary; 1263 1264 PetscFunctionBegin; 1265 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1266 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1267 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1268 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1269 if (iascii || isdraw || issocket) { 1270 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1271 } else if (isbinary) { 1272 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1273 } else { 1274 SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1275 } 1276 PetscFunctionReturn(0); 1277 } 1278 1279 #undef __FUNCT__ 1280 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1281 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1282 { 1283 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1284 PetscErrorCode ierr; 1285 1286 PetscFunctionBegin; 1287 #if defined(PETSC_USE_LOG) 1288 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1289 #endif 1290 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1291 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1292 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1293 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1294 #if defined (PETSC_USE_CTABLE) 1295 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1296 #else 1297 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1298 #endif 1299 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1300 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1301 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1302 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1303 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1304 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1305 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1306 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1307 1308 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1309 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1310 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1311 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1312 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1313 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1314 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1315 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1316 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1317 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",PETSC_NULL);CHKERRQ(ierr); 1318 PetscFunctionReturn(0); 1319 } 1320 1321 #undef __FUNCT__ 1322 #define __FUNCT__ "MatMult_MPIBAIJ" 1323 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1324 { 1325 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1326 PetscErrorCode ierr; 1327 PetscInt nt; 1328 1329 PetscFunctionBegin; 1330 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1331 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1332 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1333 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1334 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1335 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1336 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1337 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1338 PetscFunctionReturn(0); 1339 } 1340 1341 #undef __FUNCT__ 1342 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1343 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1344 { 1345 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1346 PetscErrorCode ierr; 1347 1348 PetscFunctionBegin; 1349 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1350 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1351 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1352 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1353 PetscFunctionReturn(0); 1354 } 1355 1356 #undef __FUNCT__ 1357 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1358 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1359 { 1360 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1361 PetscErrorCode ierr; 1362 PetscBool merged; 1363 1364 PetscFunctionBegin; 1365 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1366 /* do nondiagonal part */ 1367 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1368 if (!merged) { 1369 /* send it on its way */ 1370 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1371 /* do local part */ 1372 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1373 /* receive remote parts: note this assumes the values are not actually */ 1374 /* inserted in yy until the next line */ 1375 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1376 } else { 1377 /* do local part */ 1378 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1379 /* send it on its way */ 1380 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1381 /* values actually were received in the Begin() but we need to call this nop */ 1382 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1383 } 1384 PetscFunctionReturn(0); 1385 } 1386 1387 #undef __FUNCT__ 1388 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1389 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1390 { 1391 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1392 PetscErrorCode ierr; 1393 1394 PetscFunctionBegin; 1395 /* do nondiagonal part */ 1396 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1397 /* send it on its way */ 1398 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1399 /* do local part */ 1400 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1401 /* receive remote parts: note this assumes the values are not actually */ 1402 /* inserted in yy until the next line, which is true for my implementation*/ 1403 /* but is not perhaps always true. */ 1404 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1405 PetscFunctionReturn(0); 1406 } 1407 1408 /* 1409 This only works correctly for square matrices where the subblock A->A is the 1410 diagonal block 1411 */ 1412 #undef __FUNCT__ 1413 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1414 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1415 { 1416 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1417 PetscErrorCode ierr; 1418 1419 PetscFunctionBegin; 1420 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1421 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1422 PetscFunctionReturn(0); 1423 } 1424 1425 #undef __FUNCT__ 1426 #define __FUNCT__ "MatScale_MPIBAIJ" 1427 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1428 { 1429 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1430 PetscErrorCode ierr; 1431 1432 PetscFunctionBegin; 1433 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1434 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1435 PetscFunctionReturn(0); 1436 } 1437 1438 #undef __FUNCT__ 1439 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1440 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1441 { 1442 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1443 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1444 PetscErrorCode ierr; 1445 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1446 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1447 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1448 1449 PetscFunctionBegin; 1450 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1451 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1452 mat->getrowactive = PETSC_TRUE; 1453 1454 if (!mat->rowvalues && (idx || v)) { 1455 /* 1456 allocate enough space to hold information from the longest row. 1457 */ 1458 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1459 PetscInt max = 1,mbs = mat->mbs,tmp; 1460 for (i=0; i<mbs; i++) { 1461 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1462 if (max < tmp) { max = tmp; } 1463 } 1464 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1465 } 1466 lrow = row - brstart; 1467 1468 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1469 if (!v) {pvA = 0; pvB = 0;} 1470 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1471 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1472 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1473 nztot = nzA + nzB; 1474 1475 cmap = mat->garray; 1476 if (v || idx) { 1477 if (nztot) { 1478 /* Sort by increasing column numbers, assuming A and B already sorted */ 1479 PetscInt imark = -1; 1480 if (v) { 1481 *v = v_p = mat->rowvalues; 1482 for (i=0; i<nzB; i++) { 1483 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1484 else break; 1485 } 1486 imark = i; 1487 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1488 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1489 } 1490 if (idx) { 1491 *idx = idx_p = mat->rowindices; 1492 if (imark > -1) { 1493 for (i=0; i<imark; i++) { 1494 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1495 } 1496 } else { 1497 for (i=0; i<nzB; i++) { 1498 if (cmap[cworkB[i]/bs] < cstart) 1499 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1500 else break; 1501 } 1502 imark = i; 1503 } 1504 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1505 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1506 } 1507 } else { 1508 if (idx) *idx = 0; 1509 if (v) *v = 0; 1510 } 1511 } 1512 *nz = nztot; 1513 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1514 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1515 PetscFunctionReturn(0); 1516 } 1517 1518 #undef __FUNCT__ 1519 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1520 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1521 { 1522 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1523 1524 PetscFunctionBegin; 1525 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1526 baij->getrowactive = PETSC_FALSE; 1527 PetscFunctionReturn(0); 1528 } 1529 1530 #undef __FUNCT__ 1531 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1532 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1533 { 1534 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1535 PetscErrorCode ierr; 1536 1537 PetscFunctionBegin; 1538 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1539 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1540 PetscFunctionReturn(0); 1541 } 1542 1543 #undef __FUNCT__ 1544 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1545 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1546 { 1547 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1548 Mat A = a->A,B = a->B; 1549 PetscErrorCode ierr; 1550 PetscReal isend[5],irecv[5]; 1551 1552 PetscFunctionBegin; 1553 info->block_size = (PetscReal)matin->rmap->bs; 1554 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1555 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1556 isend[3] = info->memory; isend[4] = info->mallocs; 1557 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1558 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1559 isend[3] += info->memory; isend[4] += info->mallocs; 1560 if (flag == MAT_LOCAL) { 1561 info->nz_used = isend[0]; 1562 info->nz_allocated = isend[1]; 1563 info->nz_unneeded = isend[2]; 1564 info->memory = isend[3]; 1565 info->mallocs = isend[4]; 1566 } else if (flag == MAT_GLOBAL_MAX) { 1567 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1568 info->nz_used = irecv[0]; 1569 info->nz_allocated = irecv[1]; 1570 info->nz_unneeded = irecv[2]; 1571 info->memory = irecv[3]; 1572 info->mallocs = irecv[4]; 1573 } else if (flag == MAT_GLOBAL_SUM) { 1574 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1575 info->nz_used = irecv[0]; 1576 info->nz_allocated = irecv[1]; 1577 info->nz_unneeded = irecv[2]; 1578 info->memory = irecv[3]; 1579 info->mallocs = irecv[4]; 1580 } else { 1581 SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1582 } 1583 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1584 info->fill_ratio_needed = 0; 1585 info->factor_mallocs = 0; 1586 PetscFunctionReturn(0); 1587 } 1588 1589 #undef __FUNCT__ 1590 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1591 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1592 { 1593 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1594 PetscErrorCode ierr; 1595 1596 PetscFunctionBegin; 1597 switch (op) { 1598 case MAT_NEW_NONZERO_LOCATIONS: 1599 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1600 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1601 case MAT_KEEP_NONZERO_PATTERN: 1602 case MAT_NEW_NONZERO_LOCATION_ERR: 1603 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1604 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1605 break; 1606 case MAT_ROW_ORIENTED: 1607 a->roworiented = flg; 1608 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1609 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1610 break; 1611 case MAT_NEW_DIAGONALS: 1612 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1613 break; 1614 case MAT_IGNORE_OFF_PROC_ENTRIES: 1615 a->donotstash = flg; 1616 break; 1617 case MAT_USE_HASH_TABLE: 1618 a->ht_flag = flg; 1619 break; 1620 case MAT_SYMMETRIC: 1621 case MAT_STRUCTURALLY_SYMMETRIC: 1622 case MAT_HERMITIAN: 1623 case MAT_SYMMETRY_ETERNAL: 1624 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1625 break; 1626 default: 1627 SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op); 1628 } 1629 PetscFunctionReturn(0); 1630 } 1631 1632 #undef __FUNCT__ 1633 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1634 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1635 { 1636 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1637 Mat_SeqBAIJ *Aloc; 1638 Mat B; 1639 PetscErrorCode ierr; 1640 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1641 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1642 MatScalar *a; 1643 1644 PetscFunctionBegin; 1645 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1646 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1647 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1648 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1649 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1650 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1651 } else { 1652 B = *matout; 1653 } 1654 1655 /* copy over the A part */ 1656 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1657 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1658 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1659 1660 for (i=0; i<mbs; i++) { 1661 rvals[0] = bs*(baij->rstartbs + i); 1662 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1663 for (j=ai[i]; j<ai[i+1]; j++) { 1664 col = (baij->cstartbs+aj[j])*bs; 1665 for (k=0; k<bs; k++) { 1666 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1667 col++; a += bs; 1668 } 1669 } 1670 } 1671 /* copy over the B part */ 1672 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1673 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1674 for (i=0; i<mbs; i++) { 1675 rvals[0] = bs*(baij->rstartbs + i); 1676 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1677 for (j=ai[i]; j<ai[i+1]; j++) { 1678 col = baij->garray[aj[j]]*bs; 1679 for (k=0; k<bs; k++) { 1680 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1681 col++; a += bs; 1682 } 1683 } 1684 } 1685 ierr = PetscFree(rvals);CHKERRQ(ierr); 1686 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1687 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1688 1689 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1690 *matout = B; 1691 } else { 1692 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1693 } 1694 PetscFunctionReturn(0); 1695 } 1696 1697 #undef __FUNCT__ 1698 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1699 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1700 { 1701 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1702 Mat a = baij->A,b = baij->B; 1703 PetscErrorCode ierr; 1704 PetscInt s1,s2,s3; 1705 1706 PetscFunctionBegin; 1707 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1708 if (rr) { 1709 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1710 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1711 /* Overlap communication with computation. */ 1712 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1713 } 1714 if (ll) { 1715 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1716 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1717 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1718 } 1719 /* scale the diagonal block */ 1720 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1721 1722 if (rr) { 1723 /* Do a scatter end and then right scale the off-diagonal block */ 1724 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1725 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1726 } 1727 1728 PetscFunctionReturn(0); 1729 } 1730 1731 #undef __FUNCT__ 1732 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1733 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1734 { 1735 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1736 PetscErrorCode ierr; 1737 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1738 PetscInt i,*owners = A->rmap->range; 1739 PetscInt *nprocs,j,idx,nsends,row; 1740 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1741 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1742 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1743 MPI_Comm comm = ((PetscObject)A)->comm; 1744 MPI_Request *send_waits,*recv_waits; 1745 MPI_Status recv_status,*send_status; 1746 const PetscScalar *xx; 1747 PetscScalar *bb; 1748 #if defined(PETSC_DEBUG) 1749 PetscBool found = PETSC_FALSE; 1750 #endif 1751 1752 PetscFunctionBegin; 1753 /* first count number of contributors to each processor */ 1754 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1755 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1756 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1757 j = 0; 1758 for (i=0; i<N; i++) { 1759 if (lastidx > (idx = rows[i])) j = 0; 1760 lastidx = idx; 1761 for (; j<size; j++) { 1762 if (idx >= owners[j] && idx < owners[j+1]) { 1763 nprocs[2*j]++; 1764 nprocs[2*j+1] = 1; 1765 owner[i] = j; 1766 #if defined(PETSC_DEBUG) 1767 found = PETSC_TRUE; 1768 #endif 1769 break; 1770 } 1771 } 1772 #if defined(PETSC_DEBUG) 1773 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1774 found = PETSC_FALSE; 1775 #endif 1776 } 1777 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1778 1779 if (A->nooffproczerorows) { 1780 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 1781 nrecvs = nsends; 1782 nmax = N; 1783 } else { 1784 /* inform other processors of number of messages and max length*/ 1785 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1786 } 1787 1788 /* post receives: */ 1789 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1790 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1791 for (i=0; i<nrecvs; i++) { 1792 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1793 } 1794 1795 /* do sends: 1796 1) starts[i] gives the starting index in svalues for stuff going to 1797 the ith processor 1798 */ 1799 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1800 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1801 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1802 starts[0] = 0; 1803 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1804 for (i=0; i<N; i++) { 1805 svalues[starts[owner[i]]++] = rows[i]; 1806 } 1807 1808 starts[0] = 0; 1809 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1810 count = 0; 1811 for (i=0; i<size; i++) { 1812 if (nprocs[2*i+1]) { 1813 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1814 } 1815 } 1816 ierr = PetscFree(starts);CHKERRQ(ierr); 1817 1818 base = owners[rank]; 1819 1820 /* wait on receives */ 1821 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1822 count = nrecvs; 1823 slen = 0; 1824 while (count) { 1825 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1826 /* unpack receives into our local space */ 1827 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1828 source[imdex] = recv_status.MPI_SOURCE; 1829 lens[imdex] = n; 1830 slen += n; 1831 count--; 1832 } 1833 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1834 1835 /* move the data into the send scatter */ 1836 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1837 count = 0; 1838 for (i=0; i<nrecvs; i++) { 1839 values = rvalues + i*nmax; 1840 for (j=0; j<lens[i]; j++) { 1841 lrows[count++] = values[j] - base; 1842 } 1843 } 1844 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1845 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1846 ierr = PetscFree(owner);CHKERRQ(ierr); 1847 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1848 1849 /* fix right hand side if needed */ 1850 if (x && b) { 1851 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1852 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1853 for (i=0; i<slen; i++) { 1854 bb[lrows[i]] = diag*xx[lrows[i]]; 1855 } 1856 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1857 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1858 } 1859 1860 /* actually zap the local rows */ 1861 /* 1862 Zero the required rows. If the "diagonal block" of the matrix 1863 is square and the user wishes to set the diagonal we use separate 1864 code so that MatSetValues() is not called for each diagonal allocating 1865 new memory, thus calling lots of mallocs and slowing things down. 1866 1867 */ 1868 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1869 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1870 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1871 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 1872 } else if (diag != 0.0) { 1873 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1874 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1875 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1876 for (i=0; i<slen; i++) { 1877 row = lrows[i] + rstart_bs; 1878 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1879 } 1880 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1881 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1882 } else { 1883 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1884 } 1885 1886 ierr = PetscFree(lrows);CHKERRQ(ierr); 1887 1888 /* wait on sends */ 1889 if (nsends) { 1890 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1891 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1892 ierr = PetscFree(send_status);CHKERRQ(ierr); 1893 } 1894 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1895 ierr = PetscFree(svalues);CHKERRQ(ierr); 1896 1897 PetscFunctionReturn(0); 1898 } 1899 1900 #undef __FUNCT__ 1901 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1902 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1903 { 1904 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1905 PetscErrorCode ierr; 1906 1907 PetscFunctionBegin; 1908 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1909 PetscFunctionReturn(0); 1910 } 1911 1912 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1913 1914 #undef __FUNCT__ 1915 #define __FUNCT__ "MatEqual_MPIBAIJ" 1916 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1917 { 1918 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1919 Mat a,b,c,d; 1920 PetscBool flg; 1921 PetscErrorCode ierr; 1922 1923 PetscFunctionBegin; 1924 a = matA->A; b = matA->B; 1925 c = matB->A; d = matB->B; 1926 1927 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1928 if (flg) { 1929 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1930 } 1931 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1932 PetscFunctionReturn(0); 1933 } 1934 1935 #undef __FUNCT__ 1936 #define __FUNCT__ "MatCopy_MPIBAIJ" 1937 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1938 { 1939 PetscErrorCode ierr; 1940 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1941 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1942 1943 PetscFunctionBegin; 1944 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1945 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1946 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1947 } else { 1948 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1949 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1950 } 1951 PetscFunctionReturn(0); 1952 } 1953 1954 #undef __FUNCT__ 1955 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1956 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1957 { 1958 PetscErrorCode ierr; 1959 1960 PetscFunctionBegin; 1961 ierr = MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1962 PetscFunctionReturn(0); 1963 } 1964 1965 #undef __FUNCT__ 1966 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1967 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1968 { 1969 PetscErrorCode ierr; 1970 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1971 PetscBLASInt bnz,one=1; 1972 Mat_SeqBAIJ *x,*y; 1973 1974 PetscFunctionBegin; 1975 if (str == SAME_NONZERO_PATTERN) { 1976 PetscScalar alpha = a; 1977 x = (Mat_SeqBAIJ *)xx->A->data; 1978 y = (Mat_SeqBAIJ *)yy->A->data; 1979 bnz = PetscBLASIntCast(x->nz); 1980 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1981 x = (Mat_SeqBAIJ *)xx->B->data; 1982 y = (Mat_SeqBAIJ *)yy->B->data; 1983 bnz = PetscBLASIntCast(x->nz); 1984 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1985 } else { 1986 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1987 } 1988 PetscFunctionReturn(0); 1989 } 1990 1991 #undef __FUNCT__ 1992 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ" 1993 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs) 1994 { 1995 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1996 PetscInt rbs,cbs; 1997 PetscErrorCode ierr; 1998 1999 PetscFunctionBegin; 2000 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 2001 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 2002 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 2003 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 2004 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 2005 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 2006 PetscFunctionReturn(0); 2007 } 2008 2009 #undef __FUNCT__ 2010 #define __FUNCT__ "MatRealPart_MPIBAIJ" 2011 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 2012 { 2013 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2014 PetscErrorCode ierr; 2015 2016 PetscFunctionBegin; 2017 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2018 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2019 PetscFunctionReturn(0); 2020 } 2021 2022 #undef __FUNCT__ 2023 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 2024 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2025 { 2026 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2027 PetscErrorCode ierr; 2028 2029 PetscFunctionBegin; 2030 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2031 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2032 PetscFunctionReturn(0); 2033 } 2034 2035 #undef __FUNCT__ 2036 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2037 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2038 { 2039 PetscErrorCode ierr; 2040 IS iscol_local; 2041 PetscInt csize; 2042 2043 PetscFunctionBegin; 2044 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2045 if (call == MAT_REUSE_MATRIX) { 2046 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2047 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2048 } else { 2049 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2050 } 2051 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2052 if (call == MAT_INITIAL_MATRIX) { 2053 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2054 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2055 } 2056 PetscFunctionReturn(0); 2057 } 2058 2059 #undef __FUNCT__ 2060 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2061 /* 2062 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2063 in local and then by concatenating the local matrices the end result. 2064 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2065 */ 2066 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2067 { 2068 PetscErrorCode ierr; 2069 PetscMPIInt rank,size; 2070 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2071 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2072 Mat *local,M,Mreuse; 2073 MatScalar *vwork,*aa; 2074 MPI_Comm comm = ((PetscObject)mat)->comm; 2075 Mat_SeqBAIJ *aij; 2076 2077 2078 PetscFunctionBegin; 2079 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2080 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2081 2082 if (call == MAT_REUSE_MATRIX) { 2083 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2084 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2085 local = &Mreuse; 2086 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2087 } else { 2088 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2089 Mreuse = *local; 2090 ierr = PetscFree(local);CHKERRQ(ierr); 2091 } 2092 2093 /* 2094 m - number of local rows 2095 n - number of columns (same on all processors) 2096 rstart - first row in new global matrix generated 2097 */ 2098 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2099 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2100 m = m/bs; 2101 n = n/bs; 2102 2103 if (call == MAT_INITIAL_MATRIX) { 2104 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2105 ii = aij->i; 2106 jj = aij->j; 2107 2108 /* 2109 Determine the number of non-zeros in the diagonal and off-diagonal 2110 portions of the matrix in order to do correct preallocation 2111 */ 2112 2113 /* first get start and end of "diagonal" columns */ 2114 if (csize == PETSC_DECIDE) { 2115 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2116 if (mglobal == n*bs) { /* square matrix */ 2117 nlocal = m; 2118 } else { 2119 nlocal = n/size + ((n % size) > rank); 2120 } 2121 } else { 2122 nlocal = csize/bs; 2123 } 2124 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2125 rstart = rend - nlocal; 2126 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2127 2128 /* next, compute all the lengths */ 2129 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2130 olens = dlens + m; 2131 for (i=0; i<m; i++) { 2132 jend = ii[i+1] - ii[i]; 2133 olen = 0; 2134 dlen = 0; 2135 for (j=0; j<jend; j++) { 2136 if (*jj < rstart || *jj >= rend) olen++; 2137 else dlen++; 2138 jj++; 2139 } 2140 olens[i] = olen; 2141 dlens[i] = dlen; 2142 } 2143 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2144 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2145 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2146 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2147 ierr = PetscFree(dlens);CHKERRQ(ierr); 2148 } else { 2149 PetscInt ml,nl; 2150 2151 M = *newmat; 2152 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2153 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2154 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2155 /* 2156 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2157 rather than the slower MatSetValues(). 2158 */ 2159 M->was_assembled = PETSC_TRUE; 2160 M->assembled = PETSC_FALSE; 2161 } 2162 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2163 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2164 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2165 ii = aij->i; 2166 jj = aij->j; 2167 aa = aij->a; 2168 for (i=0; i<m; i++) { 2169 row = rstart/bs + i; 2170 nz = ii[i+1] - ii[i]; 2171 cwork = jj; jj += nz; 2172 vwork = aa; aa += nz; 2173 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2174 } 2175 2176 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2177 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2178 *newmat = M; 2179 2180 /* save submatrix used in processor for next request */ 2181 if (call == MAT_INITIAL_MATRIX) { 2182 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2183 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2184 } 2185 2186 PetscFunctionReturn(0); 2187 } 2188 2189 #undef __FUNCT__ 2190 #define __FUNCT__ "MatPermute_MPIBAIJ" 2191 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2192 { 2193 MPI_Comm comm,pcomm; 2194 PetscInt first,local_size,nrows; 2195 const PetscInt *rows; 2196 PetscMPIInt size; 2197 IS crowp,growp,irowp,lrowp,lcolp,icolp; 2198 PetscErrorCode ierr; 2199 2200 PetscFunctionBegin; 2201 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2202 /* make a collective version of 'rowp' */ 2203 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2204 if (pcomm==comm) { 2205 crowp = rowp; 2206 } else { 2207 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2208 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2209 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2210 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2211 } 2212 /* collect the global row permutation and invert it */ 2213 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2214 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2215 if (pcomm!=comm) { 2216 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2217 } 2218 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2219 /* get the local target indices */ 2220 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2221 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 2222 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2223 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 2224 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2225 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 2226 /* the column permutation is so much easier; 2227 make a local version of 'colp' and invert it */ 2228 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2229 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2230 if (size==1) { 2231 lcolp = colp; 2232 } else { 2233 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 2234 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 2235 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr); 2236 } 2237 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2238 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 2239 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 2240 if (size>1) { 2241 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 2242 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2243 } 2244 /* now we just get the submatrix */ 2245 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2246 /* clean up */ 2247 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 2248 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 2249 PetscFunctionReturn(0); 2250 } 2251 2252 #undef __FUNCT__ 2253 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2254 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2255 { 2256 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2257 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2258 2259 PetscFunctionBegin; 2260 if (nghosts) { *nghosts = B->nbs;} 2261 if (ghosts) {*ghosts = baij->garray;} 2262 PetscFunctionReturn(0); 2263 } 2264 2265 extern PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat); 2266 2267 #undef __FUNCT__ 2268 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2269 /* 2270 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2271 */ 2272 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2273 { 2274 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2275 PetscErrorCode ierr; 2276 PetscMPIInt size,*ncolsonproc,*disp,nn; 2277 PetscInt bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col; 2278 const PetscInt *is; 2279 PetscInt nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj; 2280 PetscInt *rowhit,M,cstart,cend,colb; 2281 PetscInt *columnsforrow,l; 2282 IS *isa; 2283 PetscBool done,flg; 2284 ISLocalToGlobalMapping map = mat->cmap->bmapping; 2285 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2286 2287 PetscFunctionBegin; 2288 if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2289 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2290 2291 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2292 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2293 M = mat->rmap->n/bs; 2294 cstart = mat->cmap->rstart/bs; 2295 cend = mat->cmap->rend/bs; 2296 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2297 c->N = mat->cmap->N/bs; 2298 c->m = mat->rmap->n/bs; 2299 c->rstart = mat->rmap->rstart/bs; 2300 2301 c->ncolors = nis; 2302 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2303 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2304 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2305 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2306 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2307 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2308 2309 /* Allow access to data structures of local part of matrix */ 2310 if (!baij->colmap) { 2311 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2312 } 2313 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2314 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2315 2316 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2317 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2318 2319 for (i=0; i<nis; i++) { 2320 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2321 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2322 c->ncolumns[i] = n; 2323 if (n) { 2324 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2325 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2326 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2327 } else { 2328 c->columns[i] = 0; 2329 } 2330 2331 if (ctype == IS_COLORING_GLOBAL){ 2332 /* Determine the total (parallel) number of columns of this color */ 2333 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2334 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2335 2336 nn = PetscMPIIntCast(n); 2337 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2338 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2339 if (!nctot) { 2340 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2341 } 2342 2343 disp[0] = 0; 2344 for (j=1; j<size; j++) { 2345 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2346 } 2347 2348 /* Get complete list of columns for color on each processor */ 2349 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2350 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2351 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2352 } else if (ctype == IS_COLORING_GHOSTED){ 2353 /* Determine local number of columns of this color on this process, including ghost points */ 2354 nctot = n; 2355 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2356 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2357 } else { 2358 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2359 } 2360 2361 /* 2362 Mark all rows affect by these columns 2363 */ 2364 /* Temporary option to allow for debugging/testing */ 2365 flg = PETSC_FALSE; 2366 ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2367 if (!flg) {/*-----------------------------------------------------------------------------*/ 2368 /* crude, fast version */ 2369 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2370 /* loop over columns*/ 2371 for (j=0; j<nctot; j++) { 2372 if (ctype == IS_COLORING_GHOSTED) { 2373 col = ltog[cols[j]]; 2374 } else { 2375 col = cols[j]; 2376 } 2377 if (col >= cstart && col < cend) { 2378 /* column is in diagonal block of matrix */ 2379 rows = A_cj + A_ci[col-cstart]; 2380 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2381 } else { 2382 #if defined (PETSC_USE_CTABLE) 2383 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2384 colb --; 2385 #else 2386 colb = baij->colmap[col] - 1; 2387 #endif 2388 if (colb == -1) { 2389 m = 0; 2390 } else { 2391 colb = colb/bs; 2392 rows = B_cj + B_ci[colb]; 2393 m = B_ci[colb+1] - B_ci[colb]; 2394 } 2395 } 2396 /* loop over columns marking them in rowhit */ 2397 for (k=0; k<m; k++) { 2398 rowhit[*rows++] = col + 1; 2399 } 2400 } 2401 2402 /* count the number of hits */ 2403 nrows = 0; 2404 for (j=0; j<M; j++) { 2405 if (rowhit[j]) nrows++; 2406 } 2407 c->nrows[i] = nrows; 2408 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2409 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2410 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2411 nrows = 0; 2412 for (j=0; j<M; j++) { 2413 if (rowhit[j]) { 2414 c->rows[i][nrows] = j; 2415 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2416 nrows++; 2417 } 2418 } 2419 } else {/*-------------------------------------------------------------------------------*/ 2420 /* slow version, using rowhit as a linked list */ 2421 PetscInt currentcol,fm,mfm; 2422 rowhit[M] = M; 2423 nrows = 0; 2424 /* loop over columns*/ 2425 for (j=0; j<nctot; j++) { 2426 if (ctype == IS_COLORING_GHOSTED) { 2427 col = ltog[cols[j]]; 2428 } else { 2429 col = cols[j]; 2430 } 2431 if (col >= cstart && col < cend) { 2432 /* column is in diagonal block of matrix */ 2433 rows = A_cj + A_ci[col-cstart]; 2434 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2435 } else { 2436 #if defined (PETSC_USE_CTABLE) 2437 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2438 colb --; 2439 #else 2440 colb = baij->colmap[col] - 1; 2441 #endif 2442 if (colb == -1) { 2443 m = 0; 2444 } else { 2445 colb = colb/bs; 2446 rows = B_cj + B_ci[colb]; 2447 m = B_ci[colb+1] - B_ci[colb]; 2448 } 2449 } 2450 2451 /* loop over columns marking them in rowhit */ 2452 fm = M; /* fm points to first entry in linked list */ 2453 for (k=0; k<m; k++) { 2454 currentcol = *rows++; 2455 /* is it already in the list? */ 2456 do { 2457 mfm = fm; 2458 fm = rowhit[fm]; 2459 } while (fm < currentcol); 2460 /* not in list so add it */ 2461 if (fm != currentcol) { 2462 nrows++; 2463 columnsforrow[currentcol] = col; 2464 /* next three lines insert new entry into linked list */ 2465 rowhit[mfm] = currentcol; 2466 rowhit[currentcol] = fm; 2467 fm = currentcol; 2468 /* fm points to present position in list since we know the columns are sorted */ 2469 } else { 2470 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2471 } 2472 } 2473 } 2474 c->nrows[i] = nrows; 2475 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2476 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2477 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2478 /* now store the linked list of rows into c->rows[i] */ 2479 nrows = 0; 2480 fm = rowhit[M]; 2481 do { 2482 c->rows[i][nrows] = fm; 2483 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2484 fm = rowhit[fm]; 2485 } while (fm < M); 2486 } /* ---------------------------------------------------------------------------------------*/ 2487 ierr = PetscFree(cols);CHKERRQ(ierr); 2488 } 2489 2490 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2491 /* 2492 vscale will contain the "diagonal" on processor scalings followed by the off processor 2493 */ 2494 if (ctype == IS_COLORING_GLOBAL) { 2495 PetscInt *garray; 2496 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2497 for (i=0; i<baij->B->cmap->n/bs; i++) { 2498 for (j=0; j<bs; j++) { 2499 garray[i*bs+j] = bs*baij->garray[i]+j; 2500 } 2501 } 2502 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2503 ierr = PetscFree(garray);CHKERRQ(ierr); 2504 CHKMEMQ; 2505 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2506 for (k=0; k<c->ncolors; k++) { 2507 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2508 for (l=0; l<c->nrows[k]; l++) { 2509 col = c->columnsforrow[k][l]; 2510 if (col >= cstart && col < cend) { 2511 /* column is in diagonal block of matrix */ 2512 colb = col - cstart; 2513 } else { 2514 /* column is in "off-processor" part */ 2515 #if defined (PETSC_USE_CTABLE) 2516 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2517 colb --; 2518 #else 2519 colb = baij->colmap[col] - 1; 2520 #endif 2521 colb = colb/bs; 2522 colb += cend - cstart; 2523 } 2524 c->vscaleforrow[k][l] = colb; 2525 } 2526 } 2527 } else if (ctype == IS_COLORING_GHOSTED) { 2528 /* Get gtol mapping */ 2529 PetscInt N = mat->cmap->N, *gtol; 2530 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2531 for (i=0; i<N; i++) gtol[i] = -1; 2532 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2533 2534 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2535 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2536 for (k=0; k<c->ncolors; k++) { 2537 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2538 for (l=0; l<c->nrows[k]; l++) { 2539 col = c->columnsforrow[k][l]; /* global column index */ 2540 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2541 } 2542 } 2543 ierr = PetscFree(gtol);CHKERRQ(ierr); 2544 } 2545 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2546 2547 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2548 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2549 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2550 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2551 CHKMEMQ; 2552 PetscFunctionReturn(0); 2553 } 2554 2555 #undef __FUNCT__ 2556 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ" 2557 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat) 2558 { 2559 Mat B; 2560 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2561 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2562 Mat_SeqAIJ *b; 2563 PetscErrorCode ierr; 2564 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2565 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2566 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2567 2568 PetscFunctionBegin; 2569 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2570 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2571 2572 /* ---------------------------------------------------------------- 2573 Tell every processor the number of nonzeros per row 2574 */ 2575 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2576 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2577 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2578 } 2579 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2580 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2581 displs = recvcounts + size; 2582 for (i=0; i<size; i++) { 2583 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2584 displs[i] = A->rmap->range[i]/bs; 2585 } 2586 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2587 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2588 #else 2589 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2590 #endif 2591 /* --------------------------------------------------------------- 2592 Create the sequential matrix of the same type as the local block diagonal 2593 */ 2594 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2595 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2596 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2597 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2598 b = (Mat_SeqAIJ *)B->data; 2599 2600 /*-------------------------------------------------------------------- 2601 Copy my part of matrix column indices over 2602 */ 2603 sendcount = ad->nz + bd->nz; 2604 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2605 a_jsendbuf = ad->j; 2606 b_jsendbuf = bd->j; 2607 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2608 cnt = 0; 2609 for (i=0; i<n; i++) { 2610 2611 /* put in lower diagonal portion */ 2612 m = bd->i[i+1] - bd->i[i]; 2613 while (m > 0) { 2614 /* is it above diagonal (in bd (compressed) numbering) */ 2615 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2616 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2617 m--; 2618 } 2619 2620 /* put in diagonal portion */ 2621 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2622 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2623 } 2624 2625 /* put in upper diagonal portion */ 2626 while (m-- > 0) { 2627 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2628 } 2629 } 2630 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2631 2632 /*-------------------------------------------------------------------- 2633 Gather all column indices to all processors 2634 */ 2635 for (i=0; i<size; i++) { 2636 recvcounts[i] = 0; 2637 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2638 recvcounts[i] += lens[j]; 2639 } 2640 } 2641 displs[0] = 0; 2642 for (i=1; i<size; i++) { 2643 displs[i] = displs[i-1] + recvcounts[i-1]; 2644 } 2645 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2646 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2647 #else 2648 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2649 #endif 2650 /*-------------------------------------------------------------------- 2651 Assemble the matrix into useable form (note numerical values not yet set) 2652 */ 2653 /* set the b->ilen (length of each row) values */ 2654 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2655 /* set the b->i indices */ 2656 b->i[0] = 0; 2657 for (i=1; i<=A->rmap->N/bs; i++) { 2658 b->i[i] = b->i[i-1] + lens[i-1]; 2659 } 2660 ierr = PetscFree(lens);CHKERRQ(ierr); 2661 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2662 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2663 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2664 2665 if (A->symmetric){ 2666 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2667 } else if (A->hermitian) { 2668 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2669 } else if (A->structurally_symmetric) { 2670 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2671 } 2672 *newmat = B; 2673 PetscFunctionReturn(0); 2674 } 2675 2676 #undef __FUNCT__ 2677 #define __FUNCT__ "MatSOR_MPIBAIJ" 2678 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2679 { 2680 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2681 PetscErrorCode ierr; 2682 Vec bb1 = 0; 2683 2684 PetscFunctionBegin; 2685 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2686 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2687 } 2688 2689 if (flag == SOR_APPLY_UPPER) { 2690 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2691 PetscFunctionReturn(0); 2692 } 2693 2694 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2695 if (flag & SOR_ZERO_INITIAL_GUESS) { 2696 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2697 its--; 2698 } 2699 2700 while (its--) { 2701 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2702 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2703 2704 /* update rhs: bb1 = bb - B*x */ 2705 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2706 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2707 2708 /* local sweep */ 2709 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2710 } 2711 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 2712 if (flag & SOR_ZERO_INITIAL_GUESS) { 2713 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2714 its--; 2715 } 2716 while (its--) { 2717 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2718 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2719 2720 /* update rhs: bb1 = bb - B*x */ 2721 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2722 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2723 2724 /* local sweep */ 2725 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2726 } 2727 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 2728 if (flag & SOR_ZERO_INITIAL_GUESS) { 2729 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2730 its--; 2731 } 2732 while (its--) { 2733 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2734 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2735 2736 /* update rhs: bb1 = bb - B*x */ 2737 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2738 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2739 2740 /* local sweep */ 2741 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2742 } 2743 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2744 2745 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2746 PetscFunctionReturn(0); 2747 } 2748 2749 extern PetscErrorCode MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2750 2751 2752 /* -------------------------------------------------------------------*/ 2753 static struct _MatOps MatOps_Values = { 2754 MatSetValues_MPIBAIJ, 2755 MatGetRow_MPIBAIJ, 2756 MatRestoreRow_MPIBAIJ, 2757 MatMult_MPIBAIJ, 2758 /* 4*/ MatMultAdd_MPIBAIJ, 2759 MatMultTranspose_MPIBAIJ, 2760 MatMultTransposeAdd_MPIBAIJ, 2761 0, 2762 0, 2763 0, 2764 /*10*/ 0, 2765 0, 2766 0, 2767 MatSOR_MPIBAIJ, 2768 MatTranspose_MPIBAIJ, 2769 /*15*/ MatGetInfo_MPIBAIJ, 2770 MatEqual_MPIBAIJ, 2771 MatGetDiagonal_MPIBAIJ, 2772 MatDiagonalScale_MPIBAIJ, 2773 MatNorm_MPIBAIJ, 2774 /*20*/ MatAssemblyBegin_MPIBAIJ, 2775 MatAssemblyEnd_MPIBAIJ, 2776 MatSetOption_MPIBAIJ, 2777 MatZeroEntries_MPIBAIJ, 2778 /*24*/ MatZeroRows_MPIBAIJ, 2779 0, 2780 0, 2781 0, 2782 0, 2783 /*29*/ MatSetUpPreallocation_MPIBAIJ, 2784 0, 2785 0, 2786 0, 2787 0, 2788 /*34*/ MatDuplicate_MPIBAIJ, 2789 0, 2790 0, 2791 0, 2792 0, 2793 /*39*/ MatAXPY_MPIBAIJ, 2794 MatGetSubMatrices_MPIBAIJ, 2795 MatIncreaseOverlap_MPIBAIJ, 2796 MatGetValues_MPIBAIJ, 2797 MatCopy_MPIBAIJ, 2798 /*44*/ 0, 2799 MatScale_MPIBAIJ, 2800 0, 2801 0, 2802 0, 2803 /*49*/ MatSetBlockSize_MPIBAIJ, 2804 0, 2805 0, 2806 0, 2807 0, 2808 /*54*/ MatFDColoringCreate_MPIBAIJ, 2809 0, 2810 MatSetUnfactored_MPIBAIJ, 2811 MatPermute_MPIBAIJ, 2812 MatSetValuesBlocked_MPIBAIJ, 2813 /*59*/ MatGetSubMatrix_MPIBAIJ, 2814 MatDestroy_MPIBAIJ, 2815 MatView_MPIBAIJ, 2816 0, 2817 0, 2818 /*64*/ 0, 2819 0, 2820 0, 2821 0, 2822 0, 2823 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2824 0, 2825 0, 2826 0, 2827 0, 2828 /*74*/ 0, 2829 MatFDColoringApply_BAIJ, 2830 0, 2831 0, 2832 0, 2833 /*79*/ 0, 2834 0, 2835 0, 2836 0, 2837 MatLoad_MPIBAIJ, 2838 /*84*/ 0, 2839 0, 2840 0, 2841 0, 2842 0, 2843 /*89*/ 0, 2844 0, 2845 0, 2846 0, 2847 0, 2848 /*94*/ 0, 2849 0, 2850 0, 2851 0, 2852 0, 2853 /*99*/ 0, 2854 0, 2855 0, 2856 0, 2857 0, 2858 /*104*/0, 2859 MatRealPart_MPIBAIJ, 2860 MatImaginaryPart_MPIBAIJ, 2861 0, 2862 0, 2863 /*109*/0, 2864 0, 2865 0, 2866 0, 2867 0, 2868 /*114*/MatGetSeqNonzerostructure_MPIBAIJ, 2869 0, 2870 MatGetGhosts_MPIBAIJ, 2871 0, 2872 0, 2873 /*119*/0, 2874 0, 2875 0, 2876 0 2877 }; 2878 2879 EXTERN_C_BEGIN 2880 #undef __FUNCT__ 2881 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2882 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2883 { 2884 PetscFunctionBegin; 2885 *a = ((Mat_MPIBAIJ *)A->data)->A; 2886 PetscFunctionReturn(0); 2887 } 2888 EXTERN_C_END 2889 2890 EXTERN_C_BEGIN 2891 extern PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2892 EXTERN_C_END 2893 2894 EXTERN_C_BEGIN 2895 #undef __FUNCT__ 2896 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2897 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2898 { 2899 PetscInt m,rstart,cstart,cend; 2900 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2901 const PetscInt *JJ=0; 2902 PetscScalar *values=0; 2903 PetscErrorCode ierr; 2904 2905 PetscFunctionBegin; 2906 2907 if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2908 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2909 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2910 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2911 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2912 m = B->rmap->n/bs; 2913 rstart = B->rmap->rstart/bs; 2914 cstart = B->cmap->rstart/bs; 2915 cend = B->cmap->rend/bs; 2916 2917 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2918 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2919 for (i=0; i<m; i++) { 2920 nz = ii[i+1] - ii[i]; 2921 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2922 nz_max = PetscMax(nz_max,nz); 2923 JJ = jj + ii[i]; 2924 for (j=0; j<nz; j++) { 2925 if (*JJ >= cstart) break; 2926 JJ++; 2927 } 2928 d = 0; 2929 for (; j<nz; j++) { 2930 if (*JJ++ >= cend) break; 2931 d++; 2932 } 2933 d_nnz[i] = d; 2934 o_nnz[i] = nz - d; 2935 } 2936 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2937 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2938 2939 values = (PetscScalar*)V; 2940 if (!values) { 2941 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2942 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2943 } 2944 for (i=0; i<m; i++) { 2945 PetscInt row = i + rstart; 2946 PetscInt ncols = ii[i+1] - ii[i]; 2947 const PetscInt *icols = jj + ii[i]; 2948 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2949 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2950 } 2951 2952 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2953 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2954 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2955 2956 PetscFunctionReturn(0); 2957 } 2958 EXTERN_C_END 2959 2960 #undef __FUNCT__ 2961 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2962 /*@C 2963 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2964 (the default parallel PETSc format). 2965 2966 Collective on MPI_Comm 2967 2968 Input Parameters: 2969 + A - the matrix 2970 . bs - the block size 2971 . i - the indices into j for the start of each local row (starts with zero) 2972 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2973 - v - optional values in the matrix 2974 2975 Level: developer 2976 2977 .keywords: matrix, aij, compressed row, sparse, parallel 2978 2979 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2980 @*/ 2981 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2982 { 2983 PetscErrorCode ierr; 2984 2985 PetscFunctionBegin; 2986 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2987 PetscFunctionReturn(0); 2988 } 2989 2990 EXTERN_C_BEGIN 2991 #undef __FUNCT__ 2992 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2993 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2994 { 2995 Mat_MPIBAIJ *b; 2996 PetscErrorCode ierr; 2997 PetscInt i, newbs = PetscAbs(bs); 2998 2999 PetscFunctionBegin; 3000 if (bs < 0) { 3001 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 3002 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 3003 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3004 bs = PetscAbs(bs); 3005 } 3006 if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 3007 bs = newbs; 3008 3009 3010 if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 3011 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3012 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3013 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3014 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3015 3016 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3017 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3018 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3019 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3020 3021 if (d_nnz) { 3022 for (i=0; i<B->rmap->n/bs; i++) { 3023 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 3024 } 3025 } 3026 if (o_nnz) { 3027 for (i=0; i<B->rmap->n/bs; i++) { 3028 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 3029 } 3030 } 3031 3032 b = (Mat_MPIBAIJ*)B->data; 3033 b->bs2 = bs*bs; 3034 b->mbs = B->rmap->n/bs; 3035 b->nbs = B->cmap->n/bs; 3036 b->Mbs = B->rmap->N/bs; 3037 b->Nbs = B->cmap->N/bs; 3038 3039 for (i=0; i<=b->size; i++) { 3040 b->rangebs[i] = B->rmap->range[i]/bs; 3041 } 3042 b->rstartbs = B->rmap->rstart/bs; 3043 b->rendbs = B->rmap->rend/bs; 3044 b->cstartbs = B->cmap->rstart/bs; 3045 b->cendbs = B->cmap->rend/bs; 3046 3047 if (!B->preallocated) { 3048 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3049 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3050 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 3051 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3052 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3053 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3054 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 3055 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3056 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 3057 } 3058 3059 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3060 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 3061 B->preallocated = PETSC_TRUE; 3062 PetscFunctionReturn(0); 3063 } 3064 EXTERN_C_END 3065 3066 EXTERN_C_BEGIN 3067 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 3068 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 3069 EXTERN_C_END 3070 3071 3072 EXTERN_C_BEGIN 3073 #undef __FUNCT__ 3074 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 3075 PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj) 3076 { 3077 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 3078 PetscErrorCode ierr; 3079 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 3080 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 3081 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 3082 3083 PetscFunctionBegin; 3084 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 3085 ii[0] = 0; 3086 CHKMEMQ; 3087 for (i=0; i<M; i++) { 3088 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 3089 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 3090 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 3091 /* remove one from count of matrix has diagonal */ 3092 for (j=id[i]; j<id[i+1]; j++) { 3093 if (jd[j] == i) {ii[i+1]--;break;} 3094 } 3095 CHKMEMQ; 3096 } 3097 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3098 cnt = 0; 3099 for (i=0; i<M; i++) { 3100 for (j=io[i]; j<io[i+1]; j++) { 3101 if (garray[jo[j]] > rstart) break; 3102 jj[cnt++] = garray[jo[j]]; 3103 CHKMEMQ; 3104 } 3105 for (k=id[i]; k<id[i+1]; k++) { 3106 if (jd[k] != i) { 3107 jj[cnt++] = rstart + jd[k]; 3108 CHKMEMQ; 3109 } 3110 } 3111 for (;j<io[i+1]; j++) { 3112 jj[cnt++] = garray[jo[j]]; 3113 CHKMEMQ; 3114 } 3115 } 3116 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 3117 PetscFunctionReturn(0); 3118 } 3119 EXTERN_C_END 3120 3121 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3122 EXTERN_C_BEGIN 3123 PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*); 3124 EXTERN_C_END 3125 3126 EXTERN_C_BEGIN 3127 #undef __FUNCT__ 3128 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 3129 PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat) 3130 { 3131 PetscErrorCode ierr; 3132 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3133 Mat B; 3134 Mat_MPIAIJ *b; 3135 3136 PetscFunctionBegin; 3137 if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled"); 3138 3139 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 3140 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3141 ierr = MatSetType(B,newtype);CHKERRQ(ierr); 3142 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 3143 b = (Mat_MPIAIJ*) B->data; 3144 3145 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 3146 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 3147 ierr = DisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 3148 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 3149 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 3150 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3151 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3152 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3153 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3154 if (reuse == MAT_REUSE_MATRIX) { 3155 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3156 } else { 3157 *newmat = B; 3158 } 3159 PetscFunctionReturn(0); 3160 } 3161 EXTERN_C_END 3162 3163 EXTERN_C_BEGIN 3164 #if defined(PETSC_HAVE_MUMPS) 3165 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3166 #endif 3167 EXTERN_C_END 3168 3169 /*MC 3170 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 3171 3172 Options Database Keys: 3173 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 3174 . -mat_block_size <bs> - set the blocksize used to store the matrix 3175 - -mat_use_hash_table <fact> 3176 3177 Level: beginner 3178 3179 .seealso: MatCreateMPIBAIJ 3180 M*/ 3181 3182 EXTERN_C_BEGIN 3183 extern PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,const MatType,MatReuse,Mat*); 3184 EXTERN_C_END 3185 3186 EXTERN_C_BEGIN 3187 #undef __FUNCT__ 3188 #define __FUNCT__ "MatCreate_MPIBAIJ" 3189 PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3190 { 3191 Mat_MPIBAIJ *b; 3192 PetscErrorCode ierr; 3193 PetscBool flg; 3194 3195 PetscFunctionBegin; 3196 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 3197 B->data = (void*)b; 3198 3199 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3200 B->assembled = PETSC_FALSE; 3201 3202 B->insertmode = NOT_SET_VALUES; 3203 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 3204 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 3205 3206 /* build local table of row and column ownerships */ 3207 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 3208 3209 /* build cache for off array entries formed */ 3210 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 3211 b->donotstash = PETSC_FALSE; 3212 b->colmap = PETSC_NULL; 3213 b->garray = PETSC_NULL; 3214 b->roworiented = PETSC_TRUE; 3215 3216 /* stuff used in block assembly */ 3217 b->barray = 0; 3218 3219 /* stuff used for matrix vector multiply */ 3220 b->lvec = 0; 3221 b->Mvctx = 0; 3222 3223 /* stuff for MatGetRow() */ 3224 b->rowindices = 0; 3225 b->rowvalues = 0; 3226 b->getrowactive = PETSC_FALSE; 3227 3228 /* hash table stuff */ 3229 b->ht = 0; 3230 b->hd = 0; 3231 b->ht_size = 0; 3232 b->ht_flag = PETSC_FALSE; 3233 b->ht_fact = 0; 3234 b->ht_total_ct = 0; 3235 b->ht_insert_ct = 0; 3236 3237 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 3238 b->ijonly = PETSC_FALSE; 3239 3240 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3241 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3242 if (flg) { 3243 PetscReal fact = 1.39; 3244 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3245 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 3246 if (fact <= 1.0) fact = 1.39; 3247 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3248 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3249 } 3250 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3251 3252 #if defined(PETSC_HAVE_MUMPS) 3253 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3254 #endif 3255 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 3256 "MatConvert_MPIBAIJ_MPIAdj", 3257 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3258 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C", 3259 "MatConvert_MPIBAIJ_MPIAIJ", 3260 MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3261 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C", 3262 "MatConvert_MPIBAIJ_MPISBAIJ", 3263 MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3264 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3265 "MatStoreValues_MPIBAIJ", 3266 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3267 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3268 "MatRetrieveValues_MPIBAIJ", 3269 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3270 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3271 "MatGetDiagonalBlock_MPIBAIJ", 3272 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3273 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3274 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3275 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3276 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3277 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3278 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3279 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3280 "MatDiagonalScaleLocal_MPIBAIJ", 3281 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3282 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3283 "MatSetHashTableFactor_MPIBAIJ", 3284 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3285 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C", 3286 "MatConvert_MPIBAIJ_MPIBSTRM", 3287 MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr); 3288 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3289 PetscFunctionReturn(0); 3290 } 3291 EXTERN_C_END 3292 3293 /*MC 3294 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3295 3296 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3297 and MATMPIBAIJ otherwise. 3298 3299 Options Database Keys: 3300 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3301 3302 Level: beginner 3303 3304 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3305 M*/ 3306 3307 #undef __FUNCT__ 3308 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3309 /*@C 3310 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3311 (block compressed row). For good matrix assembly performance 3312 the user should preallocate the matrix storage by setting the parameters 3313 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3314 performance can be increased by more than a factor of 50. 3315 3316 Collective on Mat 3317 3318 Input Parameters: 3319 + A - the matrix 3320 . bs - size of blockk 3321 . d_nz - number of block nonzeros per block row in diagonal portion of local 3322 submatrix (same for all local rows) 3323 . d_nnz - array containing the number of block nonzeros in the various block rows 3324 of the in diagonal portion of the local (possibly different for each block 3325 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3326 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3327 submatrix (same for all local rows). 3328 - o_nnz - array containing the number of nonzeros in the various block rows of the 3329 off-diagonal portion of the local submatrix (possibly different for 3330 each block row) or PETSC_NULL. 3331 3332 If the *_nnz parameter is given then the *_nz parameter is ignored 3333 3334 Options Database Keys: 3335 + -mat_block_size - size of the blocks to use 3336 - -mat_use_hash_table <fact> 3337 3338 Notes: 3339 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3340 than it must be used on all processors that share the object for that argument. 3341 3342 Storage Information: 3343 For a square global matrix we define each processor's diagonal portion 3344 to be its local rows and the corresponding columns (a square submatrix); 3345 each processor's off-diagonal portion encompasses the remainder of the 3346 local matrix (a rectangular submatrix). 3347 3348 The user can specify preallocated storage for the diagonal part of 3349 the local submatrix with either d_nz or d_nnz (not both). Set 3350 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3351 memory allocation. Likewise, specify preallocated storage for the 3352 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3353 3354 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3355 the figure below we depict these three local rows and all columns (0-11). 3356 3357 .vb 3358 0 1 2 3 4 5 6 7 8 9 10 11 3359 ------------------- 3360 row 3 | o o o d d d o o o o o o 3361 row 4 | o o o d d d o o o o o o 3362 row 5 | o o o d d d o o o o o o 3363 ------------------- 3364 .ve 3365 3366 Thus, any entries in the d locations are stored in the d (diagonal) 3367 submatrix, and any entries in the o locations are stored in the 3368 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3369 stored simply in the MATSEQBAIJ format for compressed row storage. 3370 3371 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3372 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3373 In general, for PDE problems in which most nonzeros are near the diagonal, 3374 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3375 or you will get TERRIBLE performance; see the users' manual chapter on 3376 matrices. 3377 3378 You can call MatGetInfo() to get information on how effective the preallocation was; 3379 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3380 You can also run with the option -info and look for messages with the string 3381 malloc in them to see if additional memory allocation was needed. 3382 3383 Level: intermediate 3384 3385 .keywords: matrix, block, aij, compressed row, sparse, parallel 3386 3387 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 3388 @*/ 3389 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3390 { 3391 PetscErrorCode ierr; 3392 3393 PetscFunctionBegin; 3394 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3395 PetscFunctionReturn(0); 3396 } 3397 3398 #undef __FUNCT__ 3399 #define __FUNCT__ "MatCreateMPIBAIJ" 3400 /*@C 3401 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 3402 (block compressed row). For good matrix assembly performance 3403 the user should preallocate the matrix storage by setting the parameters 3404 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3405 performance can be increased by more than a factor of 50. 3406 3407 Collective on MPI_Comm 3408 3409 Input Parameters: 3410 + comm - MPI communicator 3411 . bs - size of blockk 3412 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3413 This value should be the same as the local size used in creating the 3414 y vector for the matrix-vector product y = Ax. 3415 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3416 This value should be the same as the local size used in creating the 3417 x vector for the matrix-vector product y = Ax. 3418 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3419 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3420 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3421 submatrix (same for all local rows) 3422 . d_nnz - array containing the number of nonzero blocks in the various block rows 3423 of the in diagonal portion of the local (possibly different for each block 3424 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3425 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3426 submatrix (same for all local rows). 3427 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3428 off-diagonal portion of the local submatrix (possibly different for 3429 each block row) or PETSC_NULL. 3430 3431 Output Parameter: 3432 . A - the matrix 3433 3434 Options Database Keys: 3435 + -mat_block_size - size of the blocks to use 3436 - -mat_use_hash_table <fact> 3437 3438 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3439 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3440 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3441 3442 Notes: 3443 If the *_nnz parameter is given then the *_nz parameter is ignored 3444 3445 A nonzero block is any block that as 1 or more nonzeros in it 3446 3447 The user MUST specify either the local or global matrix dimensions 3448 (possibly both). 3449 3450 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3451 than it must be used on all processors that share the object for that argument. 3452 3453 Storage Information: 3454 For a square global matrix we define each processor's diagonal portion 3455 to be its local rows and the corresponding columns (a square submatrix); 3456 each processor's off-diagonal portion encompasses the remainder of the 3457 local matrix (a rectangular submatrix). 3458 3459 The user can specify preallocated storage for the diagonal part of 3460 the local submatrix with either d_nz or d_nnz (not both). Set 3461 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3462 memory allocation. Likewise, specify preallocated storage for the 3463 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3464 3465 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3466 the figure below we depict these three local rows and all columns (0-11). 3467 3468 .vb 3469 0 1 2 3 4 5 6 7 8 9 10 11 3470 ------------------- 3471 row 3 | o o o d d d o o o o o o 3472 row 4 | o o o d d d o o o o o o 3473 row 5 | o o o d d d o o o o o o 3474 ------------------- 3475 .ve 3476 3477 Thus, any entries in the d locations are stored in the d (diagonal) 3478 submatrix, and any entries in the o locations are stored in the 3479 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3480 stored simply in the MATSEQBAIJ format for compressed row storage. 3481 3482 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3483 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3484 In general, for PDE problems in which most nonzeros are near the diagonal, 3485 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3486 or you will get TERRIBLE performance; see the users' manual chapter on 3487 matrices. 3488 3489 Level: intermediate 3490 3491 .keywords: matrix, block, aij, compressed row, sparse, parallel 3492 3493 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3494 @*/ 3495 PetscErrorCode MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3496 { 3497 PetscErrorCode ierr; 3498 PetscMPIInt size; 3499 3500 PetscFunctionBegin; 3501 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3502 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3503 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3504 if (size > 1) { 3505 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3506 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3507 } else { 3508 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3509 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3510 } 3511 PetscFunctionReturn(0); 3512 } 3513 3514 #undef __FUNCT__ 3515 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3516 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3517 { 3518 Mat mat; 3519 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3520 PetscErrorCode ierr; 3521 PetscInt len=0; 3522 3523 PetscFunctionBegin; 3524 *newmat = 0; 3525 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3526 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3527 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3528 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3529 3530 mat->factortype = matin->factortype; 3531 mat->preallocated = PETSC_TRUE; 3532 mat->assembled = PETSC_TRUE; 3533 mat->insertmode = NOT_SET_VALUES; 3534 3535 a = (Mat_MPIBAIJ*)mat->data; 3536 mat->rmap->bs = matin->rmap->bs; 3537 a->bs2 = oldmat->bs2; 3538 a->mbs = oldmat->mbs; 3539 a->nbs = oldmat->nbs; 3540 a->Mbs = oldmat->Mbs; 3541 a->Nbs = oldmat->Nbs; 3542 3543 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3544 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3545 3546 a->size = oldmat->size; 3547 a->rank = oldmat->rank; 3548 a->donotstash = oldmat->donotstash; 3549 a->roworiented = oldmat->roworiented; 3550 a->rowindices = 0; 3551 a->rowvalues = 0; 3552 a->getrowactive = PETSC_FALSE; 3553 a->barray = 0; 3554 a->rstartbs = oldmat->rstartbs; 3555 a->rendbs = oldmat->rendbs; 3556 a->cstartbs = oldmat->cstartbs; 3557 a->cendbs = oldmat->cendbs; 3558 3559 /* hash table stuff */ 3560 a->ht = 0; 3561 a->hd = 0; 3562 a->ht_size = 0; 3563 a->ht_flag = oldmat->ht_flag; 3564 a->ht_fact = oldmat->ht_fact; 3565 a->ht_total_ct = 0; 3566 a->ht_insert_ct = 0; 3567 3568 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3569 if (oldmat->colmap) { 3570 #if defined (PETSC_USE_CTABLE) 3571 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3572 #else 3573 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3574 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3575 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3576 #endif 3577 } else a->colmap = 0; 3578 3579 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3580 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3581 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3582 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3583 } else a->garray = 0; 3584 3585 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3586 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3587 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3588 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3589 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3590 3591 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3592 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3593 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3594 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3595 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3596 *newmat = mat; 3597 3598 PetscFunctionReturn(0); 3599 } 3600 3601 #undef __FUNCT__ 3602 #define __FUNCT__ "MatLoad_MPIBAIJ" 3603 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3604 { 3605 PetscErrorCode ierr; 3606 int fd; 3607 PetscInt i,nz,j,rstart,rend; 3608 PetscScalar *vals,*buf; 3609 MPI_Comm comm = ((PetscObject)viewer)->comm; 3610 MPI_Status status; 3611 PetscMPIInt rank,size,maxnz; 3612 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3613 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3614 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3615 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3616 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3617 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3618 3619 PetscFunctionBegin; 3620 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3621 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3622 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3623 3624 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3625 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3626 if (!rank) { 3627 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3628 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3629 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3630 } 3631 3632 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3633 3634 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3635 M = header[1]; N = header[2]; 3636 3637 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3638 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3639 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3640 3641 /* If global sizes are set, check if they are consistent with that given in the file */ 3642 if (sizesset) { 3643 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3644 } 3645 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3646 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3647 3648 if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices"); 3649 3650 /* 3651 This code adds extra rows to make sure the number of rows is 3652 divisible by the blocksize 3653 */ 3654 Mbs = M/bs; 3655 extra_rows = bs - M + bs*Mbs; 3656 if (extra_rows == bs) extra_rows = 0; 3657 else Mbs++; 3658 if (extra_rows && !rank) { 3659 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3660 } 3661 3662 /* determine ownership of all rows */ 3663 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3664 mbs = Mbs/size + ((Mbs % size) > rank); 3665 m = mbs*bs; 3666 } else { /* User set */ 3667 m = newmat->rmap->n; 3668 mbs = m/bs; 3669 } 3670 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3671 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3672 3673 /* process 0 needs enough room for process with most rows */ 3674 if (!rank) { 3675 mmax = rowners[1]; 3676 for (i=2; i<size; i++) { 3677 mmax = PetscMax(mmax,rowners[i]); 3678 } 3679 mmax*=bs; 3680 } else mmax = m; 3681 3682 rowners[0] = 0; 3683 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3684 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3685 rstart = rowners[rank]; 3686 rend = rowners[rank+1]; 3687 3688 /* distribute row lengths to all processors */ 3689 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3690 if (!rank) { 3691 mend = m; 3692 if (size == 1) mend = mend - extra_rows; 3693 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3694 for (j=mend; j<m; j++) locrowlens[j] = 1; 3695 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3696 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3697 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3698 for (j=0; j<m; j++) { 3699 procsnz[0] += locrowlens[j]; 3700 } 3701 for (i=1; i<size; i++) { 3702 mend = browners[i+1] - browners[i]; 3703 if (i == size-1) mend = mend - extra_rows; 3704 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3705 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3706 /* calculate the number of nonzeros on each processor */ 3707 for (j=0; j<browners[i+1]-browners[i]; j++) { 3708 procsnz[i] += rowlengths[j]; 3709 } 3710 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3711 } 3712 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3713 } else { 3714 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3715 } 3716 3717 if (!rank) { 3718 /* determine max buffer needed and allocate it */ 3719 maxnz = procsnz[0]; 3720 for (i=1; i<size; i++) { 3721 maxnz = PetscMax(maxnz,procsnz[i]); 3722 } 3723 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3724 3725 /* read in my part of the matrix column indices */ 3726 nz = procsnz[0]; 3727 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3728 mycols = ibuf; 3729 if (size == 1) nz -= extra_rows; 3730 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3731 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3732 3733 /* read in every ones (except the last) and ship off */ 3734 for (i=1; i<size-1; i++) { 3735 nz = procsnz[i]; 3736 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3737 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3738 } 3739 /* read in the stuff for the last proc */ 3740 if (size != 1) { 3741 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3742 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3743 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3744 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3745 } 3746 ierr = PetscFree(cols);CHKERRQ(ierr); 3747 } else { 3748 /* determine buffer space needed for message */ 3749 nz = 0; 3750 for (i=0; i<m; i++) { 3751 nz += locrowlens[i]; 3752 } 3753 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3754 mycols = ibuf; 3755 /* receive message of column indices*/ 3756 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3757 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3758 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3759 } 3760 3761 /* loop over local rows, determining number of off diagonal entries */ 3762 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3763 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3764 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3765 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3766 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3767 rowcount = 0; nzcount = 0; 3768 for (i=0; i<mbs; i++) { 3769 dcount = 0; 3770 odcount = 0; 3771 for (j=0; j<bs; j++) { 3772 kmax = locrowlens[rowcount]; 3773 for (k=0; k<kmax; k++) { 3774 tmp = mycols[nzcount++]/bs; 3775 if (!mask[tmp]) { 3776 mask[tmp] = 1; 3777 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3778 else masked1[dcount++] = tmp; 3779 } 3780 } 3781 rowcount++; 3782 } 3783 3784 dlens[i] = dcount; 3785 odlens[i] = odcount; 3786 3787 /* zero out the mask elements we set */ 3788 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3789 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3790 } 3791 3792 3793 if (!sizesset) { 3794 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3795 } 3796 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3797 3798 if (!rank) { 3799 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3800 /* read in my part of the matrix numerical values */ 3801 nz = procsnz[0]; 3802 vals = buf; 3803 mycols = ibuf; 3804 if (size == 1) nz -= extra_rows; 3805 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3806 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3807 3808 /* insert into matrix */ 3809 jj = rstart*bs; 3810 for (i=0; i<m; i++) { 3811 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3812 mycols += locrowlens[i]; 3813 vals += locrowlens[i]; 3814 jj++; 3815 } 3816 /* read in other processors (except the last one) and ship out */ 3817 for (i=1; i<size-1; i++) { 3818 nz = procsnz[i]; 3819 vals = buf; 3820 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3821 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3822 } 3823 /* the last proc */ 3824 if (size != 1){ 3825 nz = procsnz[i] - extra_rows; 3826 vals = buf; 3827 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3828 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3829 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3830 } 3831 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3832 } else { 3833 /* receive numeric values */ 3834 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3835 3836 /* receive message of values*/ 3837 vals = buf; 3838 mycols = ibuf; 3839 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 3840 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 3841 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3842 3843 /* insert into matrix */ 3844 jj = rstart*bs; 3845 for (i=0; i<m; i++) { 3846 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3847 mycols += locrowlens[i]; 3848 vals += locrowlens[i]; 3849 jj++; 3850 } 3851 } 3852 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3853 ierr = PetscFree(buf);CHKERRQ(ierr); 3854 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3855 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3856 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3857 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3858 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3859 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3860 3861 PetscFunctionReturn(0); 3862 } 3863 3864 #undef __FUNCT__ 3865 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3866 /*@ 3867 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3868 3869 Input Parameters: 3870 . mat - the matrix 3871 . fact - factor 3872 3873 Not Collective, each process can use a different factor 3874 3875 Level: advanced 3876 3877 Notes: 3878 This can also be set by the command line option: -mat_use_hash_table <fact> 3879 3880 .keywords: matrix, hashtable, factor, HT 3881 3882 .seealso: MatSetOption() 3883 @*/ 3884 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3885 { 3886 PetscErrorCode ierr; 3887 3888 PetscFunctionBegin; 3889 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3890 PetscFunctionReturn(0); 3891 } 3892 3893 EXTERN_C_BEGIN 3894 #undef __FUNCT__ 3895 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3896 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3897 { 3898 Mat_MPIBAIJ *baij; 3899 3900 PetscFunctionBegin; 3901 baij = (Mat_MPIBAIJ*)mat->data; 3902 baij->ht_fact = fact; 3903 PetscFunctionReturn(0); 3904 } 3905 EXTERN_C_END 3906 3907 #undef __FUNCT__ 3908 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3909 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3910 { 3911 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3912 PetscFunctionBegin; 3913 *Ad = a->A; 3914 *Ao = a->B; 3915 *colmap = a->garray; 3916 PetscFunctionReturn(0); 3917 } 3918 3919 /* 3920 Special version for direct calls from Fortran (to eliminate two function call overheads 3921 */ 3922 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3923 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3924 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3925 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3926 #endif 3927 3928 #undef __FUNCT__ 3929 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3930 /*@C 3931 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3932 3933 Collective on Mat 3934 3935 Input Parameters: 3936 + mat - the matrix 3937 . min - number of input rows 3938 . im - input rows 3939 . nin - number of input columns 3940 . in - input columns 3941 . v - numerical values input 3942 - addvin - INSERT_VALUES or ADD_VALUES 3943 3944 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3945 3946 Level: advanced 3947 3948 .seealso: MatSetValuesBlocked() 3949 @*/ 3950 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3951 { 3952 /* convert input arguments to C version */ 3953 Mat mat = *matin; 3954 PetscInt m = *min, n = *nin; 3955 InsertMode addv = *addvin; 3956 3957 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3958 const MatScalar *value; 3959 MatScalar *barray=baij->barray; 3960 PetscBool roworiented = baij->roworiented; 3961 PetscErrorCode ierr; 3962 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3963 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3964 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3965 3966 PetscFunctionBegin; 3967 /* tasks normally handled by MatSetValuesBlocked() */ 3968 if (mat->insertmode == NOT_SET_VALUES) { 3969 mat->insertmode = addv; 3970 } 3971 #if defined(PETSC_USE_DEBUG) 3972 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3973 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3974 #endif 3975 if (mat->assembled) { 3976 mat->was_assembled = PETSC_TRUE; 3977 mat->assembled = PETSC_FALSE; 3978 } 3979 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3980 3981 3982 if(!barray) { 3983 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3984 baij->barray = barray; 3985 } 3986 3987 if (roworiented) { 3988 stepval = (n-1)*bs; 3989 } else { 3990 stepval = (m-1)*bs; 3991 } 3992 for (i=0; i<m; i++) { 3993 if (im[i] < 0) continue; 3994 #if defined(PETSC_USE_DEBUG) 3995 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3996 #endif 3997 if (im[i] >= rstart && im[i] < rend) { 3998 row = im[i] - rstart; 3999 for (j=0; j<n; j++) { 4000 /* If NumCol = 1 then a copy is not required */ 4001 if ((roworiented) && (n == 1)) { 4002 barray = (MatScalar*)v + i*bs2; 4003 } else if((!roworiented) && (m == 1)) { 4004 barray = (MatScalar*)v + j*bs2; 4005 } else { /* Here a copy is required */ 4006 if (roworiented) { 4007 value = v + i*(stepval+bs)*bs + j*bs; 4008 } else { 4009 value = v + j*(stepval+bs)*bs + i*bs; 4010 } 4011 for (ii=0; ii<bs; ii++,value+=stepval) { 4012 for (jj=0; jj<bs; jj++) { 4013 *barray++ = *value++; 4014 } 4015 } 4016 barray -=bs2; 4017 } 4018 4019 if (in[j] >= cstart && in[j] < cend){ 4020 col = in[j] - cstart; 4021 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4022 } 4023 else if (in[j] < 0) continue; 4024 #if defined(PETSC_USE_DEBUG) 4025 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 4026 #endif 4027 else { 4028 if (mat->was_assembled) { 4029 if (!baij->colmap) { 4030 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 4031 } 4032 4033 #if defined(PETSC_USE_DEBUG) 4034 #if defined (PETSC_USE_CTABLE) 4035 { PetscInt data; 4036 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 4037 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4038 } 4039 #else 4040 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4041 #endif 4042 #endif 4043 #if defined (PETSC_USE_CTABLE) 4044 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4045 col = (col - 1)/bs; 4046 #else 4047 col = (baij->colmap[in[j]] - 1)/bs; 4048 #endif 4049 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 4050 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 4051 col = in[j]; 4052 } 4053 } 4054 else col = in[j]; 4055 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4056 } 4057 } 4058 } else { 4059 if (!baij->donotstash) { 4060 if (roworiented) { 4061 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4062 } else { 4063 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4064 } 4065 } 4066 } 4067 } 4068 4069 /* task normally handled by MatSetValuesBlocked() */ 4070 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4071 PetscFunctionReturn(0); 4072 } 4073 4074 #undef __FUNCT__ 4075 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 4076 /*@ 4077 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 4078 CSR format the local rows. 4079 4080 Collective on MPI_Comm 4081 4082 Input Parameters: 4083 + comm - MPI communicator 4084 . bs - the block size, only a block size of 1 is supported 4085 . m - number of local rows (Cannot be PETSC_DECIDE) 4086 . n - This value should be the same as the local size used in creating the 4087 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4088 calculated if N is given) For square matrices n is almost always m. 4089 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4090 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4091 . i - row indices 4092 . j - column indices 4093 - a - matrix values 4094 4095 Output Parameter: 4096 . mat - the matrix 4097 4098 Level: intermediate 4099 4100 Notes: 4101 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4102 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4103 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4104 4105 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4106 4107 .keywords: matrix, aij, compressed row, sparse, parallel 4108 4109 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4110 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 4111 @*/ 4112 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4113 { 4114 PetscErrorCode ierr; 4115 4116 4117 PetscFunctionBegin; 4118 if (i[0]) { 4119 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4120 } 4121 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4122 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4123 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4124 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 4125 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 4126 PetscFunctionReturn(0); 4127 } 4128