1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 extern PetscErrorCode MatDisAssemble_MPISBAIJ(Mat); 10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec); 19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *,Vec,Vec); 20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]); 21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 22 23 EXTERN_C_BEGIN 24 #undef __FUNCT__ 25 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 26 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 27 { 28 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 29 PetscErrorCode ierr; 30 31 PetscFunctionBegin; 32 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 33 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 34 PetscFunctionReturn(0); 35 } 36 EXTERN_C_END 37 38 EXTERN_C_BEGIN 39 #undef __FUNCT__ 40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 41 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 42 { 43 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 44 PetscErrorCode ierr; 45 46 PetscFunctionBegin; 47 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 48 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 49 PetscFunctionReturn(0); 50 } 51 EXTERN_C_END 52 53 54 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 55 { \ 56 \ 57 brow = row/bs; \ 58 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 59 rmax = aimax[brow]; nrow = ailen[brow]; \ 60 bcol = col/bs; \ 61 ridx = row % bs; cidx = col % bs; \ 62 low = 0; high = nrow; \ 63 while (high-low > 3) { \ 64 t = (low+high)/2; \ 65 if (rp[t] > bcol) high = t; \ 66 else low = t; \ 67 } \ 68 for (_i=low; _i<high; _i++) { \ 69 if (rp[_i] > bcol) break; \ 70 if (rp[_i] == bcol) { \ 71 bap = ap + bs2*_i + bs*cidx + ridx; \ 72 if (addv == ADD_VALUES) *bap += value; \ 73 else *bap = value; \ 74 goto a_noinsert; \ 75 } \ 76 } \ 77 if (a->nonew == 1) goto a_noinsert; \ 78 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 79 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 80 N = nrow++ - 1; \ 81 /* shift up all the later entries in this row */ \ 82 for (ii=N; ii>=_i; ii--) { \ 83 rp[ii+1] = rp[ii]; \ 84 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 85 } \ 86 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 87 rp[_i] = bcol; \ 88 ap[bs2*_i + bs*cidx + ridx] = value; \ 89 a_noinsert:; \ 90 ailen[brow] = nrow; \ 91 } 92 93 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 94 { \ 95 brow = row/bs; \ 96 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 97 rmax = bimax[brow]; nrow = bilen[brow]; \ 98 bcol = col/bs; \ 99 ridx = row % bs; cidx = col % bs; \ 100 low = 0; high = nrow; \ 101 while (high-low > 3) { \ 102 t = (low+high)/2; \ 103 if (rp[t] > bcol) high = t; \ 104 else low = t; \ 105 } \ 106 for (_i=low; _i<high; _i++) { \ 107 if (rp[_i] > bcol) break; \ 108 if (rp[_i] == bcol) { \ 109 bap = ap + bs2*_i + bs*cidx + ridx; \ 110 if (addv == ADD_VALUES) *bap += value; \ 111 else *bap = value; \ 112 goto b_noinsert; \ 113 } \ 114 } \ 115 if (b->nonew == 1) goto b_noinsert; \ 116 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 117 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 118 N = nrow++ - 1; \ 119 /* shift up all the later entries in this row */ \ 120 for (ii=N; ii>=_i; ii--) { \ 121 rp[ii+1] = rp[ii]; \ 122 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 123 } \ 124 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 125 rp[_i] = bcol; \ 126 ap[bs2*_i + bs*cidx + ridx] = value; \ 127 b_noinsert:; \ 128 bilen[brow] = nrow; \ 129 } 130 131 /* Only add/insert a(i,j) with i<=j (blocks). 132 Any a(i,j) with i>j input by user is ingored. 133 */ 134 #undef __FUNCT__ 135 #define __FUNCT__ "MatSetValues_MPISBAIJ" 136 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 137 { 138 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 139 MatScalar value; 140 PetscBool roworiented = baij->roworiented; 141 PetscErrorCode ierr; 142 PetscInt i,j,row,col; 143 PetscInt rstart_orig=mat->rmap->rstart; 144 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 145 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 146 147 /* Some Variables required in the macro */ 148 Mat A = baij->A; 149 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 150 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 151 MatScalar *aa=a->a; 152 153 Mat B = baij->B; 154 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 155 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 156 MatScalar *ba=b->a; 157 158 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 159 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 160 MatScalar *ap,*bap; 161 162 /* for stash */ 163 PetscInt n_loc, *in_loc = PETSC_NULL; 164 MatScalar *v_loc = PETSC_NULL; 165 166 PetscFunctionBegin; 167 if (v) PetscValidScalarPointer(v,6); 168 if (!baij->donotstash) { 169 if (n > baij->n_loc) { 170 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 171 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 172 ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr); 173 ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr); 174 baij->n_loc = n; 175 } 176 in_loc = baij->in_loc; 177 v_loc = baij->v_loc; 178 } 179 180 for (i=0; i<m; i++) { 181 if (im[i] < 0) continue; 182 #if defined(PETSC_USE_DEBUG) 183 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 184 #endif 185 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 186 row = im[i] - rstart_orig; /* local row index */ 187 for (j=0; j<n; j++) { 188 if (im[i]/bs > in[j]/bs) { 189 if (a->ignore_ltriangular) { 190 continue; /* ignore lower triangular blocks */ 191 } else { 192 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 193 } 194 } 195 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 196 col = in[j] - cstart_orig; /* local col index */ 197 brow = row/bs; bcol = col/bs; 198 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 199 if (roworiented) value = v[i*n+j]; 200 else value = v[i+j*m]; 201 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 202 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 203 } else if (in[j] < 0) continue; 204 #if defined(PETSC_USE_DEBUG) 205 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 206 #endif 207 else { /* off-diag entry (B) */ 208 if (mat->was_assembled) { 209 if (!baij->colmap) { 210 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 211 } 212 #if defined (PETSC_USE_CTABLE) 213 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 214 col = col - 1; 215 #else 216 col = baij->colmap[in[j]/bs] - 1; 217 #endif 218 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 219 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 220 col = in[j]; 221 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 222 B = baij->B; 223 b = (Mat_SeqBAIJ*)(B)->data; 224 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 225 ba=b->a; 226 } else col += in[j]%bs; 227 } else col = in[j]; 228 if (roworiented) value = v[i*n+j]; 229 else value = v[i+j*m]; 230 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } 233 } 234 } else { /* off processor entry */ 235 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 236 if (!baij->donotstash) { 237 mat->assembled = PETSC_FALSE; 238 n_loc = 0; 239 for (j=0; j<n; j++) { 240 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 241 in_loc[n_loc] = in[j]; 242 if (roworiented) { 243 v_loc[n_loc] = v[i*n+j]; 244 } else { 245 v_loc[n_loc] = v[j*m+i]; 246 } 247 n_loc++; 248 } 249 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 250 } 251 } 252 } 253 PetscFunctionReturn(0); 254 } 255 256 #undef __FUNCT__ 257 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 258 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 259 { 260 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 261 const MatScalar *value; 262 MatScalar *barray=baij->barray; 263 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 264 PetscErrorCode ierr; 265 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 266 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 267 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 268 269 PetscFunctionBegin; 270 if (!barray) { 271 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 272 baij->barray = barray; 273 } 274 275 if (roworiented) { 276 stepval = (n-1)*bs; 277 } else { 278 stepval = (m-1)*bs; 279 } 280 for (i=0; i<m; i++) { 281 if (im[i] < 0) continue; 282 #if defined(PETSC_USE_DEBUG) 283 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 284 #endif 285 if (im[i] >= rstart && im[i] < rend) { 286 row = im[i] - rstart; 287 for (j=0; j<n; j++) { 288 if (im[i] > in[j]) { 289 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 290 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 291 } 292 /* If NumCol = 1 then a copy is not required */ 293 if ((roworiented) && (n == 1)) { 294 barray = (MatScalar*) v + i*bs2; 295 } else if ((!roworiented) && (m == 1)) { 296 barray = (MatScalar*) v + j*bs2; 297 } else { /* Here a copy is required */ 298 if (roworiented) { 299 value = v + i*(stepval+bs)*bs + j*bs; 300 } else { 301 value = v + j*(stepval+bs)*bs + i*bs; 302 } 303 for (ii=0; ii<bs; ii++,value+=stepval) { 304 for (jj=0; jj<bs; jj++) { 305 *barray++ = *value++; 306 } 307 } 308 barray -=bs2; 309 } 310 311 if (in[j] >= cstart && in[j] < cend) { 312 col = in[j] - cstart; 313 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 314 } 315 else if (in[j] < 0) continue; 316 #if defined(PETSC_USE_DEBUG) 317 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 318 #endif 319 else { 320 if (mat->was_assembled) { 321 if (!baij->colmap) { 322 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 323 } 324 325 #if defined(PETSC_USE_DEBUG) 326 #if defined (PETSC_USE_CTABLE) 327 { PetscInt data; 328 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 329 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 330 } 331 #else 332 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 333 #endif 334 #endif 335 #if defined (PETSC_USE_CTABLE) 336 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 337 col = (col - 1)/bs; 338 #else 339 col = (baij->colmap[in[j]] - 1)/bs; 340 #endif 341 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 342 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 343 col = in[j]; 344 } 345 } 346 else col = in[j]; 347 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 348 } 349 } 350 } else { 351 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 352 if (!baij->donotstash) { 353 if (roworiented) { 354 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 355 } else { 356 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 357 } 358 } 359 } 360 } 361 PetscFunctionReturn(0); 362 } 363 364 #undef __FUNCT__ 365 #define __FUNCT__ "MatGetValues_MPISBAIJ" 366 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 367 { 368 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 369 PetscErrorCode ierr; 370 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 371 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 372 373 PetscFunctionBegin; 374 for (i=0; i<m; i++) { 375 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 376 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 377 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 378 row = idxm[i] - bsrstart; 379 for (j=0; j<n; j++) { 380 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 381 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 382 if (idxn[j] >= bscstart && idxn[j] < bscend) { 383 col = idxn[j] - bscstart; 384 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 385 } else { 386 if (!baij->colmap) { 387 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 388 } 389 #if defined (PETSC_USE_CTABLE) 390 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 391 data --; 392 #else 393 data = baij->colmap[idxn[j]/bs]-1; 394 #endif 395 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 396 else { 397 col = data + idxn[j]%bs; 398 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 399 } 400 } 401 } 402 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 403 } 404 PetscFunctionReturn(0); 405 } 406 407 #undef __FUNCT__ 408 #define __FUNCT__ "MatNorm_MPISBAIJ" 409 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 410 { 411 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 412 PetscErrorCode ierr; 413 PetscReal sum[2],*lnorm2; 414 415 PetscFunctionBegin; 416 if (baij->size == 1) { 417 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 418 } else { 419 if (type == NORM_FROBENIUS) { 420 ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr); 421 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 422 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 423 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 424 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 425 ierr = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 426 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 427 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 428 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 429 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 430 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 431 PetscReal *rsum,*rsum2,vabs; 432 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 433 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 434 MatScalar *v; 435 436 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr); 437 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 438 /* Amat */ 439 v = amat->a; jj = amat->j; 440 for (brow=0; brow<mbs; brow++) { 441 grow = bs*(rstart + brow); 442 nz = amat->i[brow+1] - amat->i[brow]; 443 for (bcol=0; bcol<nz; bcol++) { 444 gcol = bs*(rstart + *jj); jj++; 445 for (col=0; col<bs; col++) { 446 for (row=0; row<bs; row++) { 447 vabs = PetscAbsScalar(*v); v++; 448 rsum[gcol+col] += vabs; 449 /* non-diagonal block */ 450 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 451 } 452 } 453 } 454 } 455 /* Bmat */ 456 v = bmat->a; jj = bmat->j; 457 for (brow=0; brow<mbs; brow++) { 458 grow = bs*(rstart + brow); 459 nz = bmat->i[brow+1] - bmat->i[brow]; 460 for (bcol=0; bcol<nz; bcol++) { 461 gcol = bs*garray[*jj]; jj++; 462 for (col=0; col<bs; col++) { 463 for (row=0; row<bs; row++) { 464 vabs = PetscAbsScalar(*v); v++; 465 rsum[gcol+col] += vabs; 466 rsum[grow+row] += vabs; 467 } 468 } 469 } 470 } 471 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 472 *norm = 0.0; 473 for (col=0; col<mat->cmap->N; col++) { 474 if (rsum2[col] > *norm) *norm = rsum2[col]; 475 } 476 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 477 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 478 } 479 PetscFunctionReturn(0); 480 } 481 482 #undef __FUNCT__ 483 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 484 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 485 { 486 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 487 PetscErrorCode ierr; 488 PetscInt nstash,reallocs; 489 InsertMode addv; 490 491 PetscFunctionBegin; 492 if (baij->donotstash || mat->nooffprocentries) { 493 PetscFunctionReturn(0); 494 } 495 496 /* make sure all processors are either in INSERTMODE or ADDMODE */ 497 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 498 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 499 mat->insertmode = addv; /* in case this processor had no cache */ 500 501 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 502 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 503 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 504 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 505 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 506 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 507 PetscFunctionReturn(0); 508 } 509 510 #undef __FUNCT__ 511 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 512 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 513 { 514 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 515 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 516 PetscErrorCode ierr; 517 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 518 PetscInt *row,*col; 519 PetscBool other_disassembled; 520 PetscMPIInt n; 521 PetscBool r1,r2,r3; 522 MatScalar *val; 523 InsertMode addv = mat->insertmode; 524 525 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 526 PetscFunctionBegin; 527 if (!baij->donotstash && !mat->nooffprocentries) { 528 while (1) { 529 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 530 if (!flg) break; 531 532 for (i=0; i<n;) { 533 /* Now identify the consecutive vals belonging to the same row */ 534 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 535 if (j < n) ncols = j-i; 536 else ncols = n-i; 537 /* Now assemble all these values with a single function call */ 538 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 539 i = j; 540 } 541 } 542 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 543 /* Now process the block-stash. Since the values are stashed column-oriented, 544 set the roworiented flag to column oriented, and after MatSetValues() 545 restore the original flags */ 546 r1 = baij->roworiented; 547 r2 = a->roworiented; 548 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 549 baij->roworiented = PETSC_FALSE; 550 a->roworiented = PETSC_FALSE; 551 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 552 while (1) { 553 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 554 if (!flg) break; 555 556 for (i=0; i<n;) { 557 /* Now identify the consecutive vals belonging to the same row */ 558 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 559 if (j < n) ncols = j-i; 560 else ncols = n-i; 561 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 562 i = j; 563 } 564 } 565 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 566 baij->roworiented = r1; 567 a->roworiented = r2; 568 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 569 } 570 571 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 572 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 573 574 /* determine if any processor has disassembled, if so we must 575 also disassemble ourselfs, in order that we may reassemble. */ 576 /* 577 if nonzero structure of submatrix B cannot change then we know that 578 no processor disassembled thus we can skip this stuff 579 */ 580 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 581 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 582 if (mat->was_assembled && !other_disassembled) { 583 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 584 } 585 } 586 587 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 588 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 589 } 590 ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_TRUE);CHKERRQ(ierr); 591 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 592 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 593 594 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 595 baij->rowvalues = 0; 596 PetscFunctionReturn(0); 597 } 598 599 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 600 #undef __FUNCT__ 601 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 602 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 603 { 604 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 605 PetscErrorCode ierr; 606 PetscInt bs = mat->rmap->bs; 607 PetscMPIInt size = baij->size,rank = baij->rank; 608 PetscBool iascii,isdraw; 609 PetscViewer sviewer; 610 PetscViewerFormat format; 611 612 PetscFunctionBegin; 613 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 614 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 615 if (iascii) { 616 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 617 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 618 MatInfo info; 619 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 620 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 621 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 622 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 623 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 624 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 625 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 626 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 627 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 628 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 629 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 630 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 631 PetscFunctionReturn(0); 632 } else if (format == PETSC_VIEWER_ASCII_INFO) { 633 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 634 PetscFunctionReturn(0); 635 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 636 PetscFunctionReturn(0); 637 } 638 } 639 640 if (isdraw) { 641 PetscDraw draw; 642 PetscBool isnull; 643 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 644 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 645 } 646 647 if (size == 1) { 648 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 649 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 650 } else { 651 /* assemble the entire matrix onto first processor. */ 652 Mat A; 653 Mat_SeqSBAIJ *Aloc; 654 Mat_SeqBAIJ *Bloc; 655 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 656 MatScalar *a; 657 658 /* Should this be the same type as mat? */ 659 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 660 if (!rank) { 661 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 662 } else { 663 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 664 } 665 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 666 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 667 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 668 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 669 670 /* copy over the A part */ 671 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 672 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 673 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 674 675 for (i=0; i<mbs; i++) { 676 rvals[0] = bs*(baij->rstartbs + i); 677 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 678 for (j=ai[i]; j<ai[i+1]; j++) { 679 col = (baij->cstartbs+aj[j])*bs; 680 for (k=0; k<bs; k++) { 681 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 682 col++; a += bs; 683 } 684 } 685 } 686 /* copy over the B part */ 687 Bloc = (Mat_SeqBAIJ*)baij->B->data; 688 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 689 for (i=0; i<mbs; i++) { 690 691 rvals[0] = bs*(baij->rstartbs + i); 692 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 693 for (j=ai[i]; j<ai[i+1]; j++) { 694 col = baij->garray[aj[j]]*bs; 695 for (k=0; k<bs; k++) { 696 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 697 col++; a += bs; 698 } 699 } 700 } 701 ierr = PetscFree(rvals);CHKERRQ(ierr); 702 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 703 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 704 /* 705 Everyone has to call to draw the matrix since the graphics waits are 706 synchronized across all processors that share the PetscDraw object 707 */ 708 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 709 if (!rank) { 710 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 711 /* Set the type name to MATMPISBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqSBAIJ_ASCII()*/ 712 PetscStrcpy(((PetscObject)((Mat_MPISBAIJ*)(A->data))->A)->type_name,MATMPISBAIJ); 713 ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 714 } 715 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 716 ierr = MatDestroy(&A);CHKERRQ(ierr); 717 } 718 PetscFunctionReturn(0); 719 } 720 721 #undef __FUNCT__ 722 #define __FUNCT__ "MatView_MPISBAIJ" 723 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 724 { 725 PetscErrorCode ierr; 726 PetscBool iascii,isdraw,issocket,isbinary; 727 728 PetscFunctionBegin; 729 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 730 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 731 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 732 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 733 if (iascii || isdraw || issocket || isbinary) { 734 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 735 } 736 PetscFunctionReturn(0); 737 } 738 739 #undef __FUNCT__ 740 #define __FUNCT__ "MatDestroy_MPISBAIJ" 741 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 742 { 743 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 744 PetscErrorCode ierr; 745 746 PetscFunctionBegin; 747 #if defined(PETSC_USE_LOG) 748 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 749 #endif 750 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 751 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 752 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 753 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 754 #if defined (PETSC_USE_CTABLE) 755 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 756 #else 757 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 758 #endif 759 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 760 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 761 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 762 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 763 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 764 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 765 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 766 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 767 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 768 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 769 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 770 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 771 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 772 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 773 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 774 #if defined(PETSC_USE_REAL_MAT_SINGLE) 775 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 776 #endif 777 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 778 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 779 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 780 ierr = PetscFree(mat->data);CHKERRQ(ierr); 781 782 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 783 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 784 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 785 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 786 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 787 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C","",PETSC_NULL);CHKERRQ(ierr); 788 PetscFunctionReturn(0); 789 } 790 791 #undef __FUNCT__ 792 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 793 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 794 { 795 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 796 PetscErrorCode ierr; 797 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 798 PetscScalar *x,*from; 799 800 PetscFunctionBegin; 801 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 802 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 803 804 /* diagonal part */ 805 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 806 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 807 808 /* subdiagonal part */ 809 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 810 811 /* copy x into the vec slvec0 */ 812 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 813 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 814 815 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 816 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 817 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 818 819 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 820 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 821 /* supperdiagonal part */ 822 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 823 PetscFunctionReturn(0); 824 } 825 826 #undef __FUNCT__ 827 #define __FUNCT__ "MatMult_MPISBAIJ" 828 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 829 { 830 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 831 PetscErrorCode ierr; 832 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 833 PetscScalar *x,*from; 834 835 PetscFunctionBegin; 836 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 837 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 838 839 /* diagonal part */ 840 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 841 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 842 843 /* subdiagonal part */ 844 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 845 846 /* copy x into the vec slvec0 */ 847 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 848 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 849 850 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 851 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 852 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 853 854 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 855 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 856 /* supperdiagonal part */ 857 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 858 PetscFunctionReturn(0); 859 } 860 861 #undef __FUNCT__ 862 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 863 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 864 { 865 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 866 PetscErrorCode ierr; 867 PetscInt nt; 868 869 PetscFunctionBegin; 870 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 871 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 872 873 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 874 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 875 876 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 877 /* do diagonal part */ 878 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 879 /* do supperdiagonal part */ 880 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 881 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 882 /* do subdiagonal part */ 883 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 884 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 885 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 886 PetscFunctionReturn(0); 887 } 888 889 #undef __FUNCT__ 890 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 891 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 892 { 893 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 894 PetscErrorCode ierr; 895 PetscInt mbs=a->mbs,bs=A->rmap->bs; 896 PetscScalar *x,*from,zero=0.0; 897 898 PetscFunctionBegin; 899 /* 900 PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n"); 901 PetscSynchronizedFlush(((PetscObject)A)->comm); 902 */ 903 /* diagonal part */ 904 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 905 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 906 907 /* subdiagonal part */ 908 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 909 910 /* copy x into the vec slvec0 */ 911 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 912 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 913 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 914 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 915 916 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 917 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 918 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 919 920 /* supperdiagonal part */ 921 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 922 PetscFunctionReturn(0); 923 } 924 925 #undef __FUNCT__ 926 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 927 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 928 { 929 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 930 PetscErrorCode ierr; 931 932 PetscFunctionBegin; 933 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 934 /* do diagonal part */ 935 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 936 /* do supperdiagonal part */ 937 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 938 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 939 940 /* do subdiagonal part */ 941 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 942 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 943 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 944 PetscFunctionReturn(0); 945 } 946 947 /* 948 This only works correctly for square matrices where the subblock A->A is the 949 diagonal block 950 */ 951 #undef __FUNCT__ 952 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 953 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 954 { 955 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 956 PetscErrorCode ierr; 957 958 PetscFunctionBegin; 959 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 960 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 961 PetscFunctionReturn(0); 962 } 963 964 #undef __FUNCT__ 965 #define __FUNCT__ "MatScale_MPISBAIJ" 966 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 967 { 968 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 969 PetscErrorCode ierr; 970 971 PetscFunctionBegin; 972 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 973 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 974 PetscFunctionReturn(0); 975 } 976 977 #undef __FUNCT__ 978 #define __FUNCT__ "MatGetRow_MPISBAIJ" 979 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 980 { 981 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 982 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 983 PetscErrorCode ierr; 984 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 985 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 986 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 987 988 PetscFunctionBegin; 989 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 990 mat->getrowactive = PETSC_TRUE; 991 992 if (!mat->rowvalues && (idx || v)) { 993 /* 994 allocate enough space to hold information from the longest row. 995 */ 996 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 997 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 998 PetscInt max = 1,mbs = mat->mbs,tmp; 999 for (i=0; i<mbs; i++) { 1000 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1001 if (max < tmp) { max = tmp; } 1002 } 1003 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1004 } 1005 1006 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1007 lrow = row - brstart; /* local row index */ 1008 1009 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1010 if (!v) {pvA = 0; pvB = 0;} 1011 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1012 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1013 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1014 nztot = nzA + nzB; 1015 1016 cmap = mat->garray; 1017 if (v || idx) { 1018 if (nztot) { 1019 /* Sort by increasing column numbers, assuming A and B already sorted */ 1020 PetscInt imark = -1; 1021 if (v) { 1022 *v = v_p = mat->rowvalues; 1023 for (i=0; i<nzB; i++) { 1024 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1025 else break; 1026 } 1027 imark = i; 1028 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1029 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1030 } 1031 if (idx) { 1032 *idx = idx_p = mat->rowindices; 1033 if (imark > -1) { 1034 for (i=0; i<imark; i++) { 1035 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1036 } 1037 } else { 1038 for (i=0; i<nzB; i++) { 1039 if (cmap[cworkB[i]/bs] < cstart) 1040 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1041 else break; 1042 } 1043 imark = i; 1044 } 1045 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1046 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1047 } 1048 } else { 1049 if (idx) *idx = 0; 1050 if (v) *v = 0; 1051 } 1052 } 1053 *nz = nztot; 1054 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1055 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1056 PetscFunctionReturn(0); 1057 } 1058 1059 #undef __FUNCT__ 1060 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1061 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1062 { 1063 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1064 1065 PetscFunctionBegin; 1066 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1067 baij->getrowactive = PETSC_FALSE; 1068 PetscFunctionReturn(0); 1069 } 1070 1071 #undef __FUNCT__ 1072 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1073 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1074 { 1075 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1076 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1077 1078 PetscFunctionBegin; 1079 aA->getrow_utriangular = PETSC_TRUE; 1080 PetscFunctionReturn(0); 1081 } 1082 #undef __FUNCT__ 1083 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1084 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1085 { 1086 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1087 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1088 1089 PetscFunctionBegin; 1090 aA->getrow_utriangular = PETSC_FALSE; 1091 PetscFunctionReturn(0); 1092 } 1093 1094 #undef __FUNCT__ 1095 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1096 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1097 { 1098 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1099 PetscErrorCode ierr; 1100 1101 PetscFunctionBegin; 1102 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1103 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1109 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1110 { 1111 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1112 PetscErrorCode ierr; 1113 1114 PetscFunctionBegin; 1115 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1116 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1117 PetscFunctionReturn(0); 1118 } 1119 1120 #undef __FUNCT__ 1121 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1122 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1123 { 1124 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1125 PetscErrorCode ierr; 1126 1127 PetscFunctionBegin; 1128 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1129 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1130 PetscFunctionReturn(0); 1131 } 1132 1133 #undef __FUNCT__ 1134 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1135 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1136 { 1137 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1138 Mat A = a->A,B = a->B; 1139 PetscErrorCode ierr; 1140 PetscReal isend[5],irecv[5]; 1141 1142 PetscFunctionBegin; 1143 info->block_size = (PetscReal)matin->rmap->bs; 1144 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1145 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1146 isend[3] = info->memory; isend[4] = info->mallocs; 1147 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1148 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1149 isend[3] += info->memory; isend[4] += info->mallocs; 1150 if (flag == MAT_LOCAL) { 1151 info->nz_used = isend[0]; 1152 info->nz_allocated = isend[1]; 1153 info->nz_unneeded = isend[2]; 1154 info->memory = isend[3]; 1155 info->mallocs = isend[4]; 1156 } else if (flag == MAT_GLOBAL_MAX) { 1157 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1158 info->nz_used = irecv[0]; 1159 info->nz_allocated = irecv[1]; 1160 info->nz_unneeded = irecv[2]; 1161 info->memory = irecv[3]; 1162 info->mallocs = irecv[4]; 1163 } else if (flag == MAT_GLOBAL_SUM) { 1164 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1165 info->nz_used = irecv[0]; 1166 info->nz_allocated = irecv[1]; 1167 info->nz_unneeded = irecv[2]; 1168 info->memory = irecv[3]; 1169 info->mallocs = irecv[4]; 1170 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1171 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1172 info->fill_ratio_needed = 0; 1173 info->factor_mallocs = 0; 1174 PetscFunctionReturn(0); 1175 } 1176 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1179 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1180 { 1181 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1182 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1183 PetscErrorCode ierr; 1184 1185 PetscFunctionBegin; 1186 switch (op) { 1187 case MAT_NEW_NONZERO_LOCATIONS: 1188 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1189 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1190 case MAT_KEEP_NONZERO_PATTERN: 1191 case MAT_NEW_NONZERO_LOCATION_ERR: 1192 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1193 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1194 break; 1195 case MAT_ROW_ORIENTED: 1196 a->roworiented = flg; 1197 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1198 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1199 break; 1200 case MAT_NEW_DIAGONALS: 1201 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1202 break; 1203 case MAT_IGNORE_OFF_PROC_ENTRIES: 1204 a->donotstash = flg; 1205 break; 1206 case MAT_USE_HASH_TABLE: 1207 a->ht_flag = flg; 1208 break; 1209 case MAT_HERMITIAN: 1210 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1211 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1212 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1213 break; 1214 case MAT_SPD: 1215 A->spd_set = PETSC_TRUE; 1216 A->spd = flg; 1217 if (flg) { 1218 A->symmetric = PETSC_TRUE; 1219 A->structurally_symmetric = PETSC_TRUE; 1220 A->symmetric_set = PETSC_TRUE; 1221 A->structurally_symmetric_set = PETSC_TRUE; 1222 } 1223 break; 1224 case MAT_SYMMETRIC: 1225 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1226 break; 1227 case MAT_STRUCTURALLY_SYMMETRIC: 1228 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1229 break; 1230 case MAT_SYMMETRY_ETERNAL: 1231 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1232 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1233 break; 1234 case MAT_IGNORE_LOWER_TRIANGULAR: 1235 aA->ignore_ltriangular = flg; 1236 break; 1237 case MAT_ERROR_LOWER_TRIANGULAR: 1238 aA->ignore_ltriangular = flg; 1239 break; 1240 case MAT_GETROW_UPPERTRIANGULAR: 1241 aA->getrow_utriangular = flg; 1242 break; 1243 default: 1244 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1245 } 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1251 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1252 { 1253 PetscErrorCode ierr; 1254 1255 PetscFunctionBegin; 1256 if (MAT_INITIAL_MATRIX || *B != A) { 1257 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1258 } 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #undef __FUNCT__ 1263 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1264 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1265 { 1266 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1267 Mat a=baij->A, b=baij->B; 1268 PetscErrorCode ierr; 1269 PetscInt nv,m,n; 1270 PetscBool flg; 1271 1272 PetscFunctionBegin; 1273 if (ll != rr) { 1274 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1275 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1276 } 1277 if (!ll) PetscFunctionReturn(0); 1278 1279 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1280 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1281 1282 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1283 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1284 1285 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1286 1287 /* left diagonalscale the off-diagonal part */ 1288 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1289 1290 /* scale the diagonal part */ 1291 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1292 1293 /* right diagonalscale the off-diagonal part */ 1294 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1295 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1296 PetscFunctionReturn(0); 1297 } 1298 1299 #undef __FUNCT__ 1300 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1301 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1302 { 1303 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1304 PetscErrorCode ierr; 1305 1306 PetscFunctionBegin; 1307 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1308 PetscFunctionReturn(0); 1309 } 1310 1311 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1312 1313 #undef __FUNCT__ 1314 #define __FUNCT__ "MatEqual_MPISBAIJ" 1315 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1316 { 1317 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1318 Mat a,b,c,d; 1319 PetscBool flg; 1320 PetscErrorCode ierr; 1321 1322 PetscFunctionBegin; 1323 a = matA->A; b = matA->B; 1324 c = matB->A; d = matB->B; 1325 1326 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1327 if (flg) { 1328 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1329 } 1330 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1331 PetscFunctionReturn(0); 1332 } 1333 1334 #undef __FUNCT__ 1335 #define __FUNCT__ "MatCopy_MPISBAIJ" 1336 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1337 { 1338 PetscErrorCode ierr; 1339 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1340 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1341 1342 PetscFunctionBegin; 1343 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1344 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1345 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1346 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1347 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1348 } else { 1349 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1350 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1351 } 1352 PetscFunctionReturn(0); 1353 } 1354 1355 #undef __FUNCT__ 1356 #define __FUNCT__ "MatSetUp_MPISBAIJ" 1357 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1358 { 1359 PetscErrorCode ierr; 1360 1361 PetscFunctionBegin; 1362 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1363 PetscFunctionReturn(0); 1364 } 1365 1366 #undef __FUNCT__ 1367 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1368 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1369 { 1370 PetscErrorCode ierr; 1371 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data; 1372 PetscBLASInt bnz,one=1; 1373 Mat_SeqSBAIJ *xa,*ya; 1374 Mat_SeqBAIJ *xb,*yb; 1375 1376 PetscFunctionBegin; 1377 if (str == SAME_NONZERO_PATTERN) { 1378 PetscScalar alpha = a; 1379 xa = (Mat_SeqSBAIJ *)xx->A->data; 1380 ya = (Mat_SeqSBAIJ *)yy->A->data; 1381 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1382 BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one); 1383 xb = (Mat_SeqBAIJ *)xx->B->data; 1384 yb = (Mat_SeqBAIJ *)yy->B->data; 1385 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1386 BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one); 1387 } else { 1388 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1389 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1390 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1391 } 1392 PetscFunctionReturn(0); 1393 } 1394 1395 #undef __FUNCT__ 1396 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1397 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1398 { 1399 PetscErrorCode ierr; 1400 PetscInt i; 1401 PetscBool flg; 1402 1403 PetscFunctionBegin; 1404 for (i=0; i<n; i++) { 1405 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1406 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices"); 1407 } 1408 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1409 PetscFunctionReturn(0); 1410 } 1411 1412 1413 /* -------------------------------------------------------------------*/ 1414 static struct _MatOps MatOps_Values = { 1415 MatSetValues_MPISBAIJ, 1416 MatGetRow_MPISBAIJ, 1417 MatRestoreRow_MPISBAIJ, 1418 MatMult_MPISBAIJ, 1419 /* 4*/ MatMultAdd_MPISBAIJ, 1420 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1421 MatMultAdd_MPISBAIJ, 1422 0, 1423 0, 1424 0, 1425 /* 10*/ 0, 1426 0, 1427 0, 1428 MatSOR_MPISBAIJ, 1429 MatTranspose_MPISBAIJ, 1430 /* 15*/ MatGetInfo_MPISBAIJ, 1431 MatEqual_MPISBAIJ, 1432 MatGetDiagonal_MPISBAIJ, 1433 MatDiagonalScale_MPISBAIJ, 1434 MatNorm_MPISBAIJ, 1435 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1436 MatAssemblyEnd_MPISBAIJ, 1437 MatSetOption_MPISBAIJ, 1438 MatZeroEntries_MPISBAIJ, 1439 /* 24*/ 0, 1440 0, 1441 0, 1442 0, 1443 0, 1444 /* 29*/ MatSetUp_MPISBAIJ, 1445 0, 1446 0, 1447 0, 1448 0, 1449 /* 34*/ MatDuplicate_MPISBAIJ, 1450 0, 1451 0, 1452 0, 1453 0, 1454 /* 39*/ MatAXPY_MPISBAIJ, 1455 MatGetSubMatrices_MPISBAIJ, 1456 MatIncreaseOverlap_MPISBAIJ, 1457 MatGetValues_MPISBAIJ, 1458 MatCopy_MPISBAIJ, 1459 /* 44*/ 0, 1460 MatScale_MPISBAIJ, 1461 0, 1462 0, 1463 0, 1464 /* 49*/ 0, 1465 0, 1466 0, 1467 0, 1468 0, 1469 /* 54*/ 0, 1470 0, 1471 MatSetUnfactored_MPISBAIJ, 1472 0, 1473 MatSetValuesBlocked_MPISBAIJ, 1474 /* 59*/ 0, 1475 0, 1476 0, 1477 0, 1478 0, 1479 /* 64*/ 0, 1480 0, 1481 0, 1482 0, 1483 0, 1484 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1485 0, 1486 0, 1487 0, 1488 0, 1489 /* 74*/ 0, 1490 0, 1491 0, 1492 0, 1493 0, 1494 /* 79*/ 0, 1495 0, 1496 0, 1497 0, 1498 MatLoad_MPISBAIJ, 1499 /* 84*/ 0, 1500 0, 1501 0, 1502 0, 1503 0, 1504 /* 89*/ 0, 1505 0, 1506 0, 1507 0, 1508 0, 1509 /* 94*/ 0, 1510 0, 1511 0, 1512 0, 1513 0, 1514 /* 99*/ 0, 1515 0, 1516 0, 1517 0, 1518 0, 1519 /*104*/ 0, 1520 MatRealPart_MPISBAIJ, 1521 MatImaginaryPart_MPISBAIJ, 1522 MatGetRowUpperTriangular_MPISBAIJ, 1523 MatRestoreRowUpperTriangular_MPISBAIJ, 1524 /*109*/ 0, 1525 0, 1526 0, 1527 0, 1528 0, 1529 /*114*/ 0, 1530 0, 1531 0, 1532 0, 1533 0, 1534 /*119*/ 0, 1535 0, 1536 0, 1537 0 1538 }; 1539 1540 1541 EXTERN_C_BEGIN 1542 #undef __FUNCT__ 1543 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1544 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1545 { 1546 PetscFunctionBegin; 1547 *a = ((Mat_MPISBAIJ *)A->data)->A; 1548 PetscFunctionReturn(0); 1549 } 1550 EXTERN_C_END 1551 1552 EXTERN_C_BEGIN 1553 #undef __FUNCT__ 1554 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1555 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 1556 { 1557 Mat_MPISBAIJ *b; 1558 PetscErrorCode ierr; 1559 PetscInt i,mbs,Mbs; 1560 1561 PetscFunctionBegin; 1562 if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3; 1563 if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1; 1564 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1565 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1566 1567 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 1568 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 1569 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1570 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1571 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1572 1573 if (d_nnz) { 1574 for (i=0; i<B->rmap->n/bs; i++) { 1575 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 1576 } 1577 } 1578 if (o_nnz) { 1579 for (i=0; i<B->rmap->n/bs; i++) { 1580 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 1581 } 1582 } 1583 1584 b = (Mat_MPISBAIJ*)B->data; 1585 mbs = B->rmap->n/bs; 1586 Mbs = B->rmap->N/bs; 1587 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1588 1589 B->rmap->bs = bs; 1590 b->bs2 = bs*bs; 1591 b->mbs = mbs; 1592 b->nbs = mbs; 1593 b->Mbs = Mbs; 1594 b->Nbs = Mbs; 1595 1596 for (i=0; i<=b->size; i++) { 1597 b->rangebs[i] = B->rmap->range[i]/bs; 1598 } 1599 b->rstartbs = B->rmap->rstart/bs; 1600 b->rendbs = B->rmap->rend/bs; 1601 1602 b->cstartbs = B->cmap->rstart/bs; 1603 b->cendbs = B->cmap->rend/bs; 1604 1605 if (!B->preallocated) { 1606 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1607 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1608 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1609 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1610 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1611 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 1612 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1613 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1614 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 1615 } 1616 1617 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1618 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1619 B->preallocated = PETSC_TRUE; 1620 PetscFunctionReturn(0); 1621 } 1622 EXTERN_C_END 1623 1624 EXTERN_C_BEGIN 1625 #undef __FUNCT__ 1626 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ" 1627 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 1628 { 1629 PetscInt m,rstart,cstart,cend; 1630 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 1631 const PetscInt *JJ=0; 1632 PetscScalar *values=0; 1633 PetscErrorCode ierr; 1634 1635 PetscFunctionBegin; 1636 if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 1637 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 1638 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 1639 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1640 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1641 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1642 m = B->rmap->n/bs; 1643 rstart = B->rmap->rstart/bs; 1644 cstart = B->cmap->rstart/bs; 1645 cend = B->cmap->rend/bs; 1646 1647 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 1648 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 1649 for (i=0; i<m; i++) { 1650 nz = ii[i+1] - ii[i]; 1651 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 1652 nz_max = PetscMax(nz_max,nz); 1653 JJ = jj + ii[i]; 1654 for (j=0; j<nz; j++) { 1655 if (*JJ >= cstart) break; 1656 JJ++; 1657 } 1658 d = 0; 1659 for (; j<nz; j++) { 1660 if (*JJ++ >= cend) break; 1661 d++; 1662 } 1663 d_nnz[i] = d; 1664 o_nnz[i] = nz - d; 1665 } 1666 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1667 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 1668 1669 values = (PetscScalar*)V; 1670 if (!values) { 1671 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 1672 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 1673 } 1674 for (i=0; i<m; i++) { 1675 PetscInt row = i + rstart; 1676 PetscInt ncols = ii[i+1] - ii[i]; 1677 const PetscInt *icols = jj + ii[i]; 1678 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 1679 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 1680 } 1681 1682 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 1683 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1684 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1685 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1686 PetscFunctionReturn(0); 1687 } 1688 EXTERN_C_END 1689 1690 EXTERN_C_BEGIN 1691 #if defined(PETSC_HAVE_MUMPS) 1692 extern PetscErrorCode MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*); 1693 #endif 1694 #if defined(PETSC_HAVE_PASTIX) 1695 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*); 1696 #endif 1697 EXTERN_C_END 1698 1699 /*MC 1700 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1701 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 1702 the matrix is stored. 1703 1704 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 1705 can call MatSetOption(Mat, MAT_HERMITIAN); 1706 1707 Options Database Keys: 1708 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1709 1710 Level: beginner 1711 1712 .seealso: MatCreateMPISBAIJ 1713 M*/ 1714 1715 EXTERN_C_BEGIN 1716 extern PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*); 1717 EXTERN_C_END 1718 1719 EXTERN_C_BEGIN 1720 #undef __FUNCT__ 1721 #define __FUNCT__ "MatCreate_MPISBAIJ" 1722 PetscErrorCode MatCreate_MPISBAIJ(Mat B) 1723 { 1724 Mat_MPISBAIJ *b; 1725 PetscErrorCode ierr; 1726 PetscBool flg; 1727 1728 PetscFunctionBegin; 1729 ierr = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr); 1730 B->data = (void*)b; 1731 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1732 1733 B->ops->destroy = MatDestroy_MPISBAIJ; 1734 B->ops->view = MatView_MPISBAIJ; 1735 B->assembled = PETSC_FALSE; 1736 1737 B->insertmode = NOT_SET_VALUES; 1738 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 1739 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 1740 1741 /* build local table of row and column ownerships */ 1742 ierr = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 1743 1744 /* build cache for off array entries formed */ 1745 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 1746 b->donotstash = PETSC_FALSE; 1747 b->colmap = PETSC_NULL; 1748 b->garray = PETSC_NULL; 1749 b->roworiented = PETSC_TRUE; 1750 1751 /* stuff used in block assembly */ 1752 b->barray = 0; 1753 1754 /* stuff used for matrix vector multiply */ 1755 b->lvec = 0; 1756 b->Mvctx = 0; 1757 b->slvec0 = 0; 1758 b->slvec0b = 0; 1759 b->slvec1 = 0; 1760 b->slvec1a = 0; 1761 b->slvec1b = 0; 1762 b->sMvctx = 0; 1763 1764 /* stuff for MatGetRow() */ 1765 b->rowindices = 0; 1766 b->rowvalues = 0; 1767 b->getrowactive = PETSC_FALSE; 1768 1769 /* hash table stuff */ 1770 b->ht = 0; 1771 b->hd = 0; 1772 b->ht_size = 0; 1773 b->ht_flag = PETSC_FALSE; 1774 b->ht_fact = 0; 1775 b->ht_total_ct = 0; 1776 b->ht_insert_ct = 0; 1777 1778 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 1779 b->ijonly = PETSC_FALSE; 1780 1781 b->in_loc = 0; 1782 b->v_loc = 0; 1783 b->n_loc = 0; 1784 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 1785 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 1786 if (flg) { 1787 PetscReal fact = 1.39; 1788 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 1789 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 1790 if (fact <= 1.0) fact = 1.39; 1791 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1792 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 1793 } 1794 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1795 1796 #if defined(PETSC_HAVE_PASTIX) 1797 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 1798 "MatGetFactor_mpisbaij_pastix", 1799 MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr); 1800 #endif 1801 #if defined(PETSC_HAVE_MUMPS) 1802 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 1803 "MatGetFactor_sbaij_mumps", 1804 MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 1805 #endif 1806 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1807 "MatStoreValues_MPISBAIJ", 1808 MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1809 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1810 "MatRetrieveValues_MPISBAIJ", 1811 MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1812 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1813 "MatGetDiagonalBlock_MPISBAIJ", 1814 MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1815 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1816 "MatMPISBAIJSetPreallocation_MPISBAIJ", 1817 MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1818 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C", 1819 "MatMPISBAIJSetPreallocationCSR_MPISBAIJ", 1820 MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 1821 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C", 1822 "MatConvert_MPISBAIJ_MPISBSTRM", 1823 MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr); 1824 1825 B->symmetric = PETSC_TRUE; 1826 B->structurally_symmetric = PETSC_TRUE; 1827 B->symmetric_set = PETSC_TRUE; 1828 B->structurally_symmetric_set = PETSC_TRUE; 1829 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 1830 PetscFunctionReturn(0); 1831 } 1832 EXTERN_C_END 1833 1834 /*MC 1835 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1836 1837 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1838 and MATMPISBAIJ otherwise. 1839 1840 Options Database Keys: 1841 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1842 1843 Level: beginner 1844 1845 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1846 M*/ 1847 1848 #undef __FUNCT__ 1849 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1850 /*@C 1851 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1852 the user should preallocate the matrix storage by setting the parameters 1853 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1854 performance can be increased by more than a factor of 50. 1855 1856 Collective on Mat 1857 1858 Input Parameters: 1859 + A - the matrix 1860 . bs - size of blockk 1861 . d_nz - number of block nonzeros per block row in diagonal portion of local 1862 submatrix (same for all local rows) 1863 . d_nnz - array containing the number of block nonzeros in the various block rows 1864 in the upper triangular and diagonal part of the in diagonal portion of the local 1865 (possibly different for each block row) or PETSC_NULL. If you plan to factor the matrix you must leave room 1866 for the diagonal entry and set a value even if it is zero. 1867 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1868 submatrix (same for all local rows). 1869 - o_nnz - array containing the number of nonzeros in the various block rows of the 1870 off-diagonal portion of the local submatrix that is right of the diagonal 1871 (possibly different for each block row) or PETSC_NULL. 1872 1873 1874 Options Database Keys: 1875 . -mat_no_unroll - uses code that does not unroll the loops in the 1876 block calculations (much slower) 1877 . -mat_block_size - size of the blocks to use 1878 1879 Notes: 1880 1881 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1882 than it must be used on all processors that share the object for that argument. 1883 1884 If the *_nnz parameter is given then the *_nz parameter is ignored 1885 1886 Storage Information: 1887 For a square global matrix we define each processor's diagonal portion 1888 to be its local rows and the corresponding columns (a square submatrix); 1889 each processor's off-diagonal portion encompasses the remainder of the 1890 local matrix (a rectangular submatrix). 1891 1892 The user can specify preallocated storage for the diagonal part of 1893 the local submatrix with either d_nz or d_nnz (not both). Set 1894 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1895 memory allocation. Likewise, specify preallocated storage for the 1896 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1897 1898 You can call MatGetInfo() to get information on how effective the preallocation was; 1899 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 1900 You can also run with the option -info and look for messages with the string 1901 malloc in them to see if additional memory allocation was needed. 1902 1903 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1904 the figure below we depict these three local rows and all columns (0-11). 1905 1906 .vb 1907 0 1 2 3 4 5 6 7 8 9 10 11 1908 ------------------- 1909 row 3 | . . . d d d o o o o o o 1910 row 4 | . . . d d d o o o o o o 1911 row 5 | . . . d d d o o o o o o 1912 ------------------- 1913 .ve 1914 1915 Thus, any entries in the d locations are stored in the d (diagonal) 1916 submatrix, and any entries in the o locations are stored in the 1917 o (off-diagonal) submatrix. Note that the d matrix is stored in 1918 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 1919 1920 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 1921 plus the diagonal part of the d matrix, 1922 and o_nz should indicate the number of block nonzeros per row in the o matrix 1923 1924 In general, for PDE problems in which most nonzeros are near the diagonal, 1925 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1926 or you will get TERRIBLE performance; see the users' manual chapter on 1927 matrices. 1928 1929 Level: intermediate 1930 1931 .keywords: matrix, block, aij, compressed row, sparse, parallel 1932 1933 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 1934 @*/ 1935 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1936 { 1937 PetscErrorCode ierr; 1938 1939 PetscFunctionBegin; 1940 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 1941 PetscValidType(B,1); 1942 PetscValidLogicalCollectiveInt(B,bs,2); 1943 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 1944 PetscFunctionReturn(0); 1945 } 1946 1947 #undef __FUNCT__ 1948 #define __FUNCT__ "MatCreateSBAIJ" 1949 /*@C 1950 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 1951 (block compressed row). For good matrix assembly performance 1952 the user should preallocate the matrix storage by setting the parameters 1953 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1954 performance can be increased by more than a factor of 50. 1955 1956 Collective on MPI_Comm 1957 1958 Input Parameters: 1959 + comm - MPI communicator 1960 . bs - size of blockk 1961 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1962 This value should be the same as the local size used in creating the 1963 y vector for the matrix-vector product y = Ax. 1964 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1965 This value should be the same as the local size used in creating the 1966 x vector for the matrix-vector product y = Ax. 1967 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1968 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1969 . d_nz - number of block nonzeros per block row in diagonal portion of local 1970 submatrix (same for all local rows) 1971 . d_nnz - array containing the number of block nonzeros in the various block rows 1972 in the upper triangular portion of the in diagonal portion of the local 1973 (possibly different for each block block row) or PETSC_NULL. 1974 If you plan to factor the matrix you must leave room for the diagonal entry and 1975 set its value even if it is zero. 1976 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1977 submatrix (same for all local rows). 1978 - o_nnz - array containing the number of nonzeros in the various block rows of the 1979 off-diagonal portion of the local submatrix (possibly different for 1980 each block row) or PETSC_NULL. 1981 1982 Output Parameter: 1983 . A - the matrix 1984 1985 Options Database Keys: 1986 . -mat_no_unroll - uses code that does not unroll the loops in the 1987 block calculations (much slower) 1988 . -mat_block_size - size of the blocks to use 1989 . -mat_mpi - use the parallel matrix data structures even on one processor 1990 (defaults to using SeqBAIJ format on one processor) 1991 1992 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 1993 MatXXXXSetPreallocation() paradgm instead of this routine directly. 1994 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 1995 1996 Notes: 1997 The number of rows and columns must be divisible by blocksize. 1998 This matrix type does not support complex Hermitian operation. 1999 2000 The user MUST specify either the local or global matrix dimensions 2001 (possibly both). 2002 2003 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2004 than it must be used on all processors that share the object for that argument. 2005 2006 If the *_nnz parameter is given then the *_nz parameter is ignored 2007 2008 Storage Information: 2009 For a square global matrix we define each processor's diagonal portion 2010 to be its local rows and the corresponding columns (a square submatrix); 2011 each processor's off-diagonal portion encompasses the remainder of the 2012 local matrix (a rectangular submatrix). 2013 2014 The user can specify preallocated storage for the diagonal part of 2015 the local submatrix with either d_nz or d_nnz (not both). Set 2016 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2017 memory allocation. Likewise, specify preallocated storage for the 2018 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2019 2020 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2021 the figure below we depict these three local rows and all columns (0-11). 2022 2023 .vb 2024 0 1 2 3 4 5 6 7 8 9 10 11 2025 ------------------- 2026 row 3 | . . . d d d o o o o o o 2027 row 4 | . . . d d d o o o o o o 2028 row 5 | . . . d d d o o o o o o 2029 ------------------- 2030 .ve 2031 2032 Thus, any entries in the d locations are stored in the d (diagonal) 2033 submatrix, and any entries in the o locations are stored in the 2034 o (off-diagonal) submatrix. Note that the d matrix is stored in 2035 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2036 2037 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2038 plus the diagonal part of the d matrix, 2039 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2040 In general, for PDE problems in which most nonzeros are near the diagonal, 2041 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2042 or you will get TERRIBLE performance; see the users' manual chapter on 2043 matrices. 2044 2045 Level: intermediate 2046 2047 .keywords: matrix, block, aij, compressed row, sparse, parallel 2048 2049 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2050 @*/ 2051 2052 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2053 { 2054 PetscErrorCode ierr; 2055 PetscMPIInt size; 2056 2057 PetscFunctionBegin; 2058 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2059 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2060 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2061 if (size > 1) { 2062 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2063 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2064 } else { 2065 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2066 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2067 } 2068 PetscFunctionReturn(0); 2069 } 2070 2071 2072 #undef __FUNCT__ 2073 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2074 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2075 { 2076 Mat mat; 2077 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2078 PetscErrorCode ierr; 2079 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2080 PetscScalar *array; 2081 2082 PetscFunctionBegin; 2083 *newmat = 0; 2084 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 2085 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2086 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2087 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2088 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2089 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2090 2091 mat->factortype = matin->factortype; 2092 mat->preallocated = PETSC_TRUE; 2093 mat->assembled = PETSC_TRUE; 2094 mat->insertmode = NOT_SET_VALUES; 2095 2096 a = (Mat_MPISBAIJ*)mat->data; 2097 a->bs2 = oldmat->bs2; 2098 a->mbs = oldmat->mbs; 2099 a->nbs = oldmat->nbs; 2100 a->Mbs = oldmat->Mbs; 2101 a->Nbs = oldmat->Nbs; 2102 2103 2104 a->size = oldmat->size; 2105 a->rank = oldmat->rank; 2106 a->donotstash = oldmat->donotstash; 2107 a->roworiented = oldmat->roworiented; 2108 a->rowindices = 0; 2109 a->rowvalues = 0; 2110 a->getrowactive = PETSC_FALSE; 2111 a->barray = 0; 2112 a->rstartbs = oldmat->rstartbs; 2113 a->rendbs = oldmat->rendbs; 2114 a->cstartbs = oldmat->cstartbs; 2115 a->cendbs = oldmat->cendbs; 2116 2117 /* hash table stuff */ 2118 a->ht = 0; 2119 a->hd = 0; 2120 a->ht_size = 0; 2121 a->ht_flag = oldmat->ht_flag; 2122 a->ht_fact = oldmat->ht_fact; 2123 a->ht_total_ct = 0; 2124 a->ht_insert_ct = 0; 2125 2126 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2127 if (oldmat->colmap) { 2128 #if defined (PETSC_USE_CTABLE) 2129 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2130 #else 2131 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 2132 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2133 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2134 #endif 2135 } else a->colmap = 0; 2136 2137 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2138 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 2139 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2140 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2141 } else a->garray = 0; 2142 2143 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2144 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2145 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 2146 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2147 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 2148 2149 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2150 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2151 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2152 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2153 2154 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2155 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2156 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2157 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2158 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2159 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2160 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2161 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2162 ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr); 2163 ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr); 2164 ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr); 2165 ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr); 2166 ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr); 2167 2168 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2169 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2170 a->sMvctx = oldmat->sMvctx; 2171 ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr); 2172 2173 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2174 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 2175 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2176 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 2177 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2178 *newmat = mat; 2179 PetscFunctionReturn(0); 2180 } 2181 2182 #undef __FUNCT__ 2183 #define __FUNCT__ "MatLoad_MPISBAIJ" 2184 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2185 { 2186 PetscErrorCode ierr; 2187 PetscInt i,nz,j,rstart,rend; 2188 PetscScalar *vals,*buf; 2189 MPI_Comm comm = ((PetscObject)viewer)->comm; 2190 MPI_Status status; 2191 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs; 2192 PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens; 2193 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2194 PetscInt bs=1,Mbs,mbs,extra_rows; 2195 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2196 PetscInt dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols; 2197 int fd; 2198 2199 PetscFunctionBegin; 2200 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2201 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2202 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2203 2204 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2205 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2206 if (!rank) { 2207 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2208 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2209 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2210 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2211 } 2212 2213 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 2214 2215 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2216 M = header[1]; N = header[2]; 2217 2218 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2219 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 2220 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 2221 2222 /* If global sizes are set, check if they are consistent with that given in the file */ 2223 if (sizesset) { 2224 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 2225 } 2226 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2227 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2228 2229 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2230 2231 /* 2232 This code adds extra rows to make sure the number of rows is 2233 divisible by the blocksize 2234 */ 2235 Mbs = M/bs; 2236 extra_rows = bs - M + bs*(Mbs); 2237 if (extra_rows == bs) extra_rows = 0; 2238 else Mbs++; 2239 if (extra_rows &&!rank) { 2240 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2241 } 2242 2243 /* determine ownership of all rows */ 2244 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2245 mbs = Mbs/size + ((Mbs % size) > rank); 2246 m = mbs*bs; 2247 } else { /* User Set */ 2248 m = newmat->rmap->n; 2249 mbs = m/bs; 2250 } 2251 ierr = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr); 2252 ierr = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr); 2253 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2254 rowners[0] = 0; 2255 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2256 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2257 rstart = rowners[rank]; 2258 rend = rowners[rank+1]; 2259 2260 /* distribute row lengths to all processors */ 2261 ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 2262 if (!rank) { 2263 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2264 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2265 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2266 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2267 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2268 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2269 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2270 } else { 2271 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2272 } 2273 2274 if (!rank) { /* procs[0] */ 2275 /* calculate the number of nonzeros on each processor */ 2276 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2277 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2278 for (i=0; i<size; i++) { 2279 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2280 procsnz[i] += rowlengths[j]; 2281 } 2282 } 2283 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2284 2285 /* determine max buffer needed and allocate it */ 2286 maxnz = 0; 2287 for (i=0; i<size; i++) { 2288 maxnz = PetscMax(maxnz,procsnz[i]); 2289 } 2290 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2291 2292 /* read in my part of the matrix column indices */ 2293 nz = procsnz[0]; 2294 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2295 mycols = ibuf; 2296 if (size == 1) nz -= extra_rows; 2297 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2298 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2299 2300 /* read in every ones (except the last) and ship off */ 2301 for (i=1; i<size-1; i++) { 2302 nz = procsnz[i]; 2303 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2304 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2305 } 2306 /* read in the stuff for the last proc */ 2307 if (size != 1) { 2308 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2309 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2310 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2311 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2312 } 2313 ierr = PetscFree(cols);CHKERRQ(ierr); 2314 } else { /* procs[i], i>0 */ 2315 /* determine buffer space needed for message */ 2316 nz = 0; 2317 for (i=0; i<m; i++) { 2318 nz += locrowlens[i]; 2319 } 2320 ierr = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 2321 mycols = ibuf; 2322 /* receive message of column indices*/ 2323 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2324 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2325 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2326 } 2327 2328 /* loop over local rows, determining number of off diagonal entries */ 2329 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 2330 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 2331 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2332 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2333 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2334 rowcount = 0; 2335 nzcount = 0; 2336 for (i=0; i<mbs; i++) { 2337 dcount = 0; 2338 odcount = 0; 2339 for (j=0; j<bs; j++) { 2340 kmax = locrowlens[rowcount]; 2341 for (k=0; k<kmax; k++) { 2342 tmp = mycols[nzcount++]/bs; /* block col. index */ 2343 if (!mask[tmp]) { 2344 mask[tmp] = 1; 2345 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2346 else masked1[dcount++] = tmp; /* entry in diag portion */ 2347 } 2348 } 2349 rowcount++; 2350 } 2351 2352 dlens[i] = dcount; /* d_nzz[i] */ 2353 odlens[i] = odcount; /* o_nzz[i] */ 2354 2355 /* zero out the mask elements we set */ 2356 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2357 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2358 } 2359 if (!sizesset) { 2360 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2361 } 2362 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2363 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2364 2365 if (!rank) { 2366 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2367 /* read in my part of the matrix numerical values */ 2368 nz = procsnz[0]; 2369 vals = buf; 2370 mycols = ibuf; 2371 if (size == 1) nz -= extra_rows; 2372 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2373 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2374 2375 /* insert into matrix */ 2376 jj = rstart*bs; 2377 for (i=0; i<m; i++) { 2378 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2379 mycols += locrowlens[i]; 2380 vals += locrowlens[i]; 2381 jj++; 2382 } 2383 2384 /* read in other processors (except the last one) and ship out */ 2385 for (i=1; i<size-1; i++) { 2386 nz = procsnz[i]; 2387 vals = buf; 2388 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2389 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2390 } 2391 /* the last proc */ 2392 if (size != 1) { 2393 nz = procsnz[i] - extra_rows; 2394 vals = buf; 2395 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2396 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2397 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2398 } 2399 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2400 2401 } else { 2402 /* receive numeric values */ 2403 ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 2404 2405 /* receive message of values*/ 2406 vals = buf; 2407 mycols = ibuf; 2408 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2409 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2410 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2411 2412 /* insert into matrix */ 2413 jj = rstart*bs; 2414 for (i=0; i<m; i++) { 2415 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2416 mycols += locrowlens[i]; 2417 vals += locrowlens[i]; 2418 jj++; 2419 } 2420 } 2421 2422 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2423 ierr = PetscFree(buf);CHKERRQ(ierr); 2424 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2425 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2426 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2427 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2428 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2429 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2430 PetscFunctionReturn(0); 2431 } 2432 2433 #undef __FUNCT__ 2434 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2435 /*XXXXX@ 2436 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2437 2438 Input Parameters: 2439 . mat - the matrix 2440 . fact - factor 2441 2442 Not Collective on Mat, each process can have a different hash factor 2443 2444 Level: advanced 2445 2446 Notes: 2447 This can also be set by the command line option: -mat_use_hash_table fact 2448 2449 .keywords: matrix, hashtable, factor, HT 2450 2451 .seealso: MatSetOption() 2452 @XXXXX*/ 2453 2454 2455 #undef __FUNCT__ 2456 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2457 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2458 { 2459 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2460 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2461 PetscReal atmp; 2462 PetscReal *work,*svalues,*rvalues; 2463 PetscErrorCode ierr; 2464 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2465 PetscMPIInt rank,size; 2466 PetscInt *rowners_bs,dest,count,source; 2467 PetscScalar *va; 2468 MatScalar *ba; 2469 MPI_Status stat; 2470 2471 PetscFunctionBegin; 2472 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2473 ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr); 2474 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2475 2476 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2477 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2478 2479 bs = A->rmap->bs; 2480 mbs = a->mbs; 2481 Mbs = a->Mbs; 2482 ba = b->a; 2483 bi = b->i; 2484 bj = b->j; 2485 2486 /* find ownerships */ 2487 rowners_bs = A->rmap->range; 2488 2489 /* each proc creates an array to be distributed */ 2490 ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr); 2491 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2492 2493 /* row_max for B */ 2494 if (rank != size-1) { 2495 for (i=0; i<mbs; i++) { 2496 ncols = bi[1] - bi[0]; bi++; 2497 brow = bs*i; 2498 for (j=0; j<ncols; j++) { 2499 bcol = bs*(*bj); 2500 for (kcol=0; kcol<bs; kcol++) { 2501 col = bcol + kcol; /* local col index */ 2502 col += rowners_bs[rank+1]; /* global col index */ 2503 for (krow=0; krow<bs; krow++) { 2504 atmp = PetscAbsScalar(*ba); ba++; 2505 row = brow + krow; /* local row index */ 2506 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2507 if (work[col] < atmp) work[col] = atmp; 2508 } 2509 } 2510 bj++; 2511 } 2512 } 2513 2514 /* send values to its owners */ 2515 for (dest=rank+1; dest<size; dest++) { 2516 svalues = work + rowners_bs[dest]; 2517 count = rowners_bs[dest+1]-rowners_bs[dest]; 2518 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr); 2519 } 2520 } 2521 2522 /* receive values */ 2523 if (rank) { 2524 rvalues = work; 2525 count = rowners_bs[rank+1]-rowners_bs[rank]; 2526 for (source=0; source<rank; source++) { 2527 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr); 2528 /* process values */ 2529 for (i=0; i<count; i++) { 2530 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2531 } 2532 } 2533 } 2534 2535 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2536 ierr = PetscFree(work);CHKERRQ(ierr); 2537 PetscFunctionReturn(0); 2538 } 2539 2540 #undef __FUNCT__ 2541 #define __FUNCT__ "MatSOR_MPISBAIJ" 2542 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2543 { 2544 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2545 PetscErrorCode ierr; 2546 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2547 PetscScalar *x,*ptr,*from; 2548 Vec bb1; 2549 const PetscScalar *b; 2550 2551 PetscFunctionBegin; 2552 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2553 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2554 2555 if (flag == SOR_APPLY_UPPER) { 2556 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2557 PetscFunctionReturn(0); 2558 } 2559 2560 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2561 if (flag & SOR_ZERO_INITIAL_GUESS) { 2562 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2563 its--; 2564 } 2565 2566 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2567 while (its--) { 2568 2569 /* lower triangular part: slvec0b = - B^T*xx */ 2570 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2571 2572 /* copy xx into slvec0a */ 2573 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2574 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2575 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2576 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2577 2578 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2579 2580 /* copy bb into slvec1a */ 2581 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2582 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2583 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2584 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2585 2586 /* set slvec1b = 0 */ 2587 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2588 2589 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2590 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2591 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2592 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2593 2594 /* upper triangular part: bb1 = bb1 - B*x */ 2595 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2596 2597 /* local diagonal sweep */ 2598 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2599 } 2600 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2601 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2602 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2603 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2604 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2605 } else if (flag & SOR_EISENSTAT) { 2606 Vec xx1; 2607 PetscBool hasop; 2608 const PetscScalar *diag; 2609 PetscScalar *sl,scale = (omega - 2.0)/omega; 2610 PetscInt i,n; 2611 2612 if (!mat->xx1) { 2613 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2614 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2615 } 2616 xx1 = mat->xx1; 2617 bb1 = mat->bb1; 2618 2619 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2620 2621 if (!mat->diag) { 2622 /* this is wrong for same matrix with new nonzero values */ 2623 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 2624 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2625 } 2626 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2627 2628 if (hasop) { 2629 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2630 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2631 } else { 2632 /* 2633 These two lines are replaced by code that may be a bit faster for a good compiler 2634 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2635 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2636 */ 2637 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2638 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2639 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2640 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2641 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2642 if (omega == 1.0) { 2643 for (i=0; i<n; i++) { 2644 sl[i] = b[i] - diag[i]*x[i]; 2645 } 2646 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2647 } else { 2648 for (i=0; i<n; i++) { 2649 sl[i] = b[i] + scale*diag[i]*x[i]; 2650 } 2651 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2652 } 2653 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2654 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2655 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2656 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2657 } 2658 2659 /* multiply off-diagonal portion of matrix */ 2660 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2661 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2662 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2663 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2664 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2665 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2666 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2667 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2668 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2669 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2670 2671 /* local sweep */ 2672 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2673 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2674 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2675 PetscFunctionReturn(0); 2676 } 2677 2678 #undef __FUNCT__ 2679 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 2680 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2681 { 2682 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2683 PetscErrorCode ierr; 2684 Vec lvec1,bb1; 2685 2686 PetscFunctionBegin; 2687 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2688 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2689 2690 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2691 if (flag & SOR_ZERO_INITIAL_GUESS) { 2692 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2693 its--; 2694 } 2695 2696 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2697 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2698 while (its--) { 2699 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2700 2701 /* lower diagonal part: bb1 = bb - B^T*xx */ 2702 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2703 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2704 2705 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2706 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2707 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2708 2709 /* upper diagonal part: bb1 = bb1 - B*x */ 2710 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2711 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2712 2713 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2714 2715 /* diagonal sweep */ 2716 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2717 } 2718 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 2719 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2720 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2721 PetscFunctionReturn(0); 2722 } 2723 2724 #undef __FUNCT__ 2725 #define __FUNCT__ "MatCreateMPISBAIJWithArrays" 2726 /*@ 2727 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 2728 CSR format the local rows. 2729 2730 Collective on MPI_Comm 2731 2732 Input Parameters: 2733 + comm - MPI communicator 2734 . bs - the block size, only a block size of 1 is supported 2735 . m - number of local rows (Cannot be PETSC_DECIDE) 2736 . n - This value should be the same as the local size used in creating the 2737 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2738 calculated if N is given) For square matrices n is almost always m. 2739 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2740 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2741 . i - row indices 2742 . j - column indices 2743 - a - matrix values 2744 2745 Output Parameter: 2746 . mat - the matrix 2747 2748 Level: intermediate 2749 2750 Notes: 2751 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2752 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2753 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2754 2755 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2756 2757 .keywords: matrix, aij, compressed row, sparse, parallel 2758 2759 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2760 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 2761 @*/ 2762 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2763 { 2764 PetscErrorCode ierr; 2765 2766 2767 PetscFunctionBegin; 2768 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2769 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2770 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2771 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 2772 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 2773 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 2774 PetscFunctionReturn(0); 2775 } 2776 2777 2778 #undef __FUNCT__ 2779 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR" 2780 /*@C 2781 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2782 (the default parallel PETSc format). 2783 2784 Collective on MPI_Comm 2785 2786 Input Parameters: 2787 + A - the matrix 2788 . bs - the block size 2789 . i - the indices into j for the start of each local row (starts with zero) 2790 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2791 - v - optional values in the matrix 2792 2793 Level: developer 2794 2795 .keywords: matrix, aij, compressed row, sparse, parallel 2796 2797 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 2798 @*/ 2799 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2800 { 2801 PetscErrorCode ierr; 2802 2803 PetscFunctionBegin; 2804 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2805 PetscFunctionReturn(0); 2806 } 2807 2808 2809