1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petscblaslapack.h> 5 6 static PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 7 { 8 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 9 10 PetscFunctionBegin; 11 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 12 PetscCall(MatStashDestroy_Private(&mat->stash)); 13 PetscCall(MatStashDestroy_Private(&mat->bstash)); 14 PetscCall(MatDestroy(&baij->A)); 15 PetscCall(MatDestroy(&baij->B)); 16 #if defined(PETSC_USE_CTABLE) 17 PetscCall(PetscHMapIDestroy(&baij->colmap)); 18 #else 19 PetscCall(PetscFree(baij->colmap)); 20 #endif 21 PetscCall(PetscFree(baij->garray)); 22 PetscCall(VecDestroy(&baij->lvec)); 23 PetscCall(VecScatterDestroy(&baij->Mvctx)); 24 PetscCall(VecDestroy(&baij->slvec0)); 25 PetscCall(VecDestroy(&baij->slvec0b)); 26 PetscCall(VecDestroy(&baij->slvec1)); 27 PetscCall(VecDestroy(&baij->slvec1a)); 28 PetscCall(VecDestroy(&baij->slvec1b)); 29 PetscCall(VecScatterDestroy(&baij->sMvctx)); 30 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 31 PetscCall(PetscFree(baij->barray)); 32 PetscCall(PetscFree(baij->hd)); 33 PetscCall(VecDestroy(&baij->diag)); 34 PetscCall(VecDestroy(&baij->bb1)); 35 PetscCall(VecDestroy(&baij->xx1)); 36 #if defined(PETSC_USE_REAL_MAT_SINGLE) 37 PetscCall(PetscFree(baij->setvaluescopy)); 38 #endif 39 PetscCall(PetscFree(baij->in_loc)); 40 PetscCall(PetscFree(baij->v_loc)); 41 PetscCall(PetscFree(baij->rangebs)); 42 PetscCall(PetscFree(mat->data)); 43 44 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 45 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 47 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL)); 48 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL)); 49 #if defined(PETSC_HAVE_ELEMENTAL) 50 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL)); 51 #endif 52 #if defined(PETSC_HAVE_SCALAPACK) 53 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL)); 54 #endif 55 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL)); 56 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL)); 57 PetscFunctionReturn(PETSC_SUCCESS); 58 } 59 60 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), MatAssemblyEnd_MPI_Hash(), MatSetUp_MPI_Hash() */ 61 #define TYPE SBAIJ 62 #define TYPE_SBAIJ 63 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 64 #undef TYPE 65 #undef TYPE_SBAIJ 66 67 #if defined(PETSC_HAVE_ELEMENTAL) 68 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 69 #endif 70 #if defined(PETSC_HAVE_SCALAPACK) 71 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 72 #endif 73 74 /* This could be moved to matimpl.h */ 75 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) 76 { 77 Mat preallocator; 78 PetscInt r, rstart, rend; 79 PetscInt bs, i, m, n, M, N; 80 PetscBool cong = PETSC_TRUE; 81 82 PetscFunctionBegin; 83 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 84 PetscValidLogicalCollectiveInt(B, nm, 2); 85 for (i = 0; i < nm; i++) { 86 PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3); 87 PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong)); 88 PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts"); 89 } 90 PetscValidLogicalCollectiveBool(B, fill, 5); 91 PetscCall(MatGetBlockSize(B, &bs)); 92 PetscCall(MatGetSize(B, &M, &N)); 93 PetscCall(MatGetLocalSize(B, &m, &n)); 94 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator)); 95 PetscCall(MatSetType(preallocator, MATPREALLOCATOR)); 96 PetscCall(MatSetBlockSize(preallocator, bs)); 97 PetscCall(MatSetSizes(preallocator, m, n, M, N)); 98 PetscCall(MatSetUp(preallocator)); 99 PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend)); 100 for (r = rstart; r < rend; ++r) { 101 PetscInt ncols; 102 const PetscInt *row; 103 const PetscScalar *vals; 104 105 for (i = 0; i < nm; i++) { 106 PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals)); 107 PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES)); 108 if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES)); 109 PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals)); 110 } 111 } 112 PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY)); 113 PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY)); 114 PetscCall(MatPreallocatorPreallocate(preallocator, fill, B)); 115 PetscCall(MatDestroy(&preallocator)); 116 PetscFunctionReturn(PETSC_SUCCESS); 117 } 118 119 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 120 { 121 Mat B; 122 PetscInt r; 123 124 PetscFunctionBegin; 125 if (reuse != MAT_REUSE_MATRIX) { 126 PetscBool symm = PETSC_TRUE, isdense; 127 PetscInt bs; 128 129 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 130 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 131 PetscCall(MatSetType(B, newtype)); 132 PetscCall(MatGetBlockSize(A, &bs)); 133 PetscCall(MatSetBlockSize(B, bs)); 134 PetscCall(PetscLayoutSetUp(B->rmap)); 135 PetscCall(PetscLayoutSetUp(B->cmap)); 136 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, "")); 137 if (!isdense) { 138 PetscCall(MatGetRowUpperTriangular(A)); 139 PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE)); 140 PetscCall(MatRestoreRowUpperTriangular(A)); 141 } else { 142 PetscCall(MatSetUp(B)); 143 } 144 } else { 145 B = *newmat; 146 PetscCall(MatZeroEntries(B)); 147 } 148 149 PetscCall(MatGetRowUpperTriangular(A)); 150 for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 151 PetscInt ncols; 152 const PetscInt *row; 153 const PetscScalar *vals; 154 155 PetscCall(MatGetRow(A, r, &ncols, &row, &vals)); 156 PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES)); 157 #if defined(PETSC_USE_COMPLEX) 158 if (A->hermitian == PETSC_BOOL3_TRUE) { 159 PetscInt i; 160 for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES)); 161 } else { 162 PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 163 } 164 #else 165 PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 166 #endif 167 PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals)); 168 } 169 PetscCall(MatRestoreRowUpperTriangular(A)); 170 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 171 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 172 173 if (reuse == MAT_INPLACE_MATRIX) { 174 PetscCall(MatHeaderReplace(A, &B)); 175 } else { 176 *newmat = B; 177 } 178 PetscFunctionReturn(PETSC_SUCCESS); 179 } 180 181 static PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 182 { 183 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 184 185 PetscFunctionBegin; 186 PetscCall(MatStoreValues(aij->A)); 187 PetscCall(MatStoreValues(aij->B)); 188 PetscFunctionReturn(PETSC_SUCCESS); 189 } 190 191 static PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 192 { 193 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 194 195 PetscFunctionBegin; 196 PetscCall(MatRetrieveValues(aij->A)); 197 PetscCall(MatRetrieveValues(aij->B)); 198 PetscFunctionReturn(PETSC_SUCCESS); 199 } 200 201 #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 202 do { \ 203 brow = row / bs; \ 204 rp = aj + ai[brow]; \ 205 ap = aa + bs2 * ai[brow]; \ 206 rmax = aimax[brow]; \ 207 nrow = ailen[brow]; \ 208 bcol = col / bs; \ 209 ridx = row % bs; \ 210 cidx = col % bs; \ 211 low = 0; \ 212 high = nrow; \ 213 while (high - low > 3) { \ 214 t = (low + high) / 2; \ 215 if (rp[t] > bcol) high = t; \ 216 else low = t; \ 217 } \ 218 for (_i = low; _i < high; _i++) { \ 219 if (rp[_i] > bcol) break; \ 220 if (rp[_i] == bcol) { \ 221 bap = ap + bs2 * _i + bs * cidx + ridx; \ 222 if (addv == ADD_VALUES) *bap += value; \ 223 else *bap = value; \ 224 goto a_noinsert; \ 225 } \ 226 } \ 227 if (a->nonew == 1) goto a_noinsert; \ 228 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 229 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 230 N = nrow++ - 1; \ 231 /* shift up all the later entries in this row */ \ 232 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 233 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 234 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 235 rp[_i] = bcol; \ 236 ap[bs2 * _i + bs * cidx + ridx] = value; \ 237 A->nonzerostate++; \ 238 a_noinsert:; \ 239 ailen[brow] = nrow; \ 240 } while (0) 241 242 #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 243 do { \ 244 brow = row / bs; \ 245 rp = bj + bi[brow]; \ 246 ap = ba + bs2 * bi[brow]; \ 247 rmax = bimax[brow]; \ 248 nrow = bilen[brow]; \ 249 bcol = col / bs; \ 250 ridx = row % bs; \ 251 cidx = col % bs; \ 252 low = 0; \ 253 high = nrow; \ 254 while (high - low > 3) { \ 255 t = (low + high) / 2; \ 256 if (rp[t] > bcol) high = t; \ 257 else low = t; \ 258 } \ 259 for (_i = low; _i < high; _i++) { \ 260 if (rp[_i] > bcol) break; \ 261 if (rp[_i] == bcol) { \ 262 bap = ap + bs2 * _i + bs * cidx + ridx; \ 263 if (addv == ADD_VALUES) *bap += value; \ 264 else *bap = value; \ 265 goto b_noinsert; \ 266 } \ 267 } \ 268 if (b->nonew == 1) goto b_noinsert; \ 269 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 270 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 271 N = nrow++ - 1; \ 272 /* shift up all the later entries in this row */ \ 273 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 274 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 275 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 276 rp[_i] = bcol; \ 277 ap[bs2 * _i + bs * cidx + ridx] = value; \ 278 B->nonzerostate++; \ 279 b_noinsert:; \ 280 bilen[brow] = nrow; \ 281 } while (0) 282 283 /* Only add/insert a(i,j) with i<=j (blocks). 284 Any a(i,j) with i>j input by user is ignored or generates an error 285 */ 286 static PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 287 { 288 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 289 MatScalar value; 290 PetscBool roworiented = baij->roworiented; 291 PetscInt i, j, row, col; 292 PetscInt rstart_orig = mat->rmap->rstart; 293 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 294 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 295 296 /* Some Variables required in the macro */ 297 Mat A = baij->A; 298 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)(A)->data; 299 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 300 MatScalar *aa = a->a; 301 302 Mat B = baij->B; 303 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(B)->data; 304 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 305 MatScalar *ba = b->a; 306 307 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 308 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 309 MatScalar *ap, *bap; 310 311 /* for stash */ 312 PetscInt n_loc, *in_loc = NULL; 313 MatScalar *v_loc = NULL; 314 315 PetscFunctionBegin; 316 if (!baij->donotstash) { 317 if (n > baij->n_loc) { 318 PetscCall(PetscFree(baij->in_loc)); 319 PetscCall(PetscFree(baij->v_loc)); 320 PetscCall(PetscMalloc1(n, &baij->in_loc)); 321 PetscCall(PetscMalloc1(n, &baij->v_loc)); 322 323 baij->n_loc = n; 324 } 325 in_loc = baij->in_loc; 326 v_loc = baij->v_loc; 327 } 328 329 for (i = 0; i < m; i++) { 330 if (im[i] < 0) continue; 331 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 332 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 333 row = im[i] - rstart_orig; /* local row index */ 334 for (j = 0; j < n; j++) { 335 if (im[i] / bs > in[j] / bs) { 336 if (a->ignore_ltriangular) { 337 continue; /* ignore lower triangular blocks */ 338 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 339 } 340 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 341 col = in[j] - cstart_orig; /* local col index */ 342 brow = row / bs; 343 bcol = col / bs; 344 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 345 if (roworiented) value = v[i * n + j]; 346 else value = v[i + j * m]; 347 MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 348 /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */ 349 } else if (in[j] < 0) { 350 continue; 351 } else { 352 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 353 /* off-diag entry (B) */ 354 if (mat->was_assembled) { 355 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 356 #if defined(PETSC_USE_CTABLE) 357 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 358 col = col - 1; 359 #else 360 col = baij->colmap[in[j] / bs] - 1; 361 #endif 362 if (col < 0 && !((Mat_SeqSBAIJ *)baij->A->data)->nonew) { 363 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 364 col = in[j]; 365 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 366 B = baij->B; 367 b = (Mat_SeqBAIJ *)(B)->data; 368 bimax = b->imax; 369 bi = b->i; 370 bilen = b->ilen; 371 bj = b->j; 372 ba = b->a; 373 } else col += in[j] % bs; 374 } else col = in[j]; 375 if (roworiented) value = v[i * n + j]; 376 else value = v[i + j * m]; 377 MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 378 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 379 } 380 } 381 } else { /* off processor entry */ 382 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 383 if (!baij->donotstash) { 384 mat->assembled = PETSC_FALSE; 385 n_loc = 0; 386 for (j = 0; j < n; j++) { 387 if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */ 388 in_loc[n_loc] = in[j]; 389 if (roworiented) { 390 v_loc[n_loc] = v[i * n + j]; 391 } else { 392 v_loc[n_loc] = v[j * m + i]; 393 } 394 n_loc++; 395 } 396 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE)); 397 } 398 } 399 } 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 404 { 405 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 406 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 407 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 408 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 409 PetscBool roworiented = a->roworiented; 410 const PetscScalar *value = v; 411 MatScalar *ap, *aa = a->a, *bap; 412 413 PetscFunctionBegin; 414 if (col < row) { 415 if (a->ignore_ltriangular) PetscFunctionReturn(PETSC_SUCCESS); /* ignore lower triangular block */ 416 else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 417 } 418 rp = aj + ai[row]; 419 ap = aa + bs2 * ai[row]; 420 rmax = imax[row]; 421 nrow = ailen[row]; 422 value = v; 423 low = 0; 424 high = nrow; 425 426 while (high - low > 7) { 427 t = (low + high) / 2; 428 if (rp[t] > col) high = t; 429 else low = t; 430 } 431 for (i = low; i < high; i++) { 432 if (rp[i] > col) break; 433 if (rp[i] == col) { 434 bap = ap + bs2 * i; 435 if (roworiented) { 436 if (is == ADD_VALUES) { 437 for (ii = 0; ii < bs; ii++) { 438 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 439 } 440 } else { 441 for (ii = 0; ii < bs; ii++) { 442 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 443 } 444 } 445 } else { 446 if (is == ADD_VALUES) { 447 for (ii = 0; ii < bs; ii++) { 448 for (jj = 0; jj < bs; jj++) *bap++ += *value++; 449 } 450 } else { 451 for (ii = 0; ii < bs; ii++) { 452 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 453 } 454 } 455 } 456 goto noinsert2; 457 } 458 } 459 if (nonew == 1) goto noinsert2; 460 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 461 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 462 N = nrow++ - 1; 463 high++; 464 /* shift up all the later entries in this row */ 465 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 466 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 467 rp[i] = col; 468 bap = ap + bs2 * i; 469 if (roworiented) { 470 for (ii = 0; ii < bs; ii++) { 471 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 472 } 473 } else { 474 for (ii = 0; ii < bs; ii++) { 475 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 476 } 477 } 478 noinsert2:; 479 ailen[row] = nrow; 480 PetscFunctionReturn(PETSC_SUCCESS); 481 } 482 483 /* 484 This routine is exactly duplicated in mpibaij.c 485 */ 486 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 487 { 488 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 489 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 490 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 491 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 492 PetscBool roworiented = a->roworiented; 493 const PetscScalar *value = v; 494 MatScalar *ap, *aa = a->a, *bap; 495 496 PetscFunctionBegin; 497 rp = aj + ai[row]; 498 ap = aa + bs2 * ai[row]; 499 rmax = imax[row]; 500 nrow = ailen[row]; 501 low = 0; 502 high = nrow; 503 value = v; 504 while (high - low > 7) { 505 t = (low + high) / 2; 506 if (rp[t] > col) high = t; 507 else low = t; 508 } 509 for (i = low; i < high; i++) { 510 if (rp[i] > col) break; 511 if (rp[i] == col) { 512 bap = ap + bs2 * i; 513 if (roworiented) { 514 if (is == ADD_VALUES) { 515 for (ii = 0; ii < bs; ii++) { 516 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 517 } 518 } else { 519 for (ii = 0; ii < bs; ii++) { 520 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 521 } 522 } 523 } else { 524 if (is == ADD_VALUES) { 525 for (ii = 0; ii < bs; ii++, value += bs) { 526 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 527 bap += bs; 528 } 529 } else { 530 for (ii = 0; ii < bs; ii++, value += bs) { 531 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 532 bap += bs; 533 } 534 } 535 } 536 goto noinsert2; 537 } 538 } 539 if (nonew == 1) goto noinsert2; 540 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 541 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 542 N = nrow++ - 1; 543 high++; 544 /* shift up all the later entries in this row */ 545 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 546 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 547 rp[i] = col; 548 bap = ap + bs2 * i; 549 if (roworiented) { 550 for (ii = 0; ii < bs; ii++) { 551 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 552 } 553 } else { 554 for (ii = 0; ii < bs; ii++) { 555 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 556 } 557 } 558 noinsert2:; 559 ailen[row] = nrow; 560 PetscFunctionReturn(PETSC_SUCCESS); 561 } 562 563 /* 564 This routine could be optimized by removing the need for the block copy below and passing stride information 565 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 566 */ 567 static PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv) 568 { 569 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 570 const MatScalar *value; 571 MatScalar *barray = baij->barray; 572 PetscBool roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular; 573 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 574 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 575 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 576 577 PetscFunctionBegin; 578 if (!barray) { 579 PetscCall(PetscMalloc1(bs2, &barray)); 580 baij->barray = barray; 581 } 582 583 if (roworiented) { 584 stepval = (n - 1) * bs; 585 } else { 586 stepval = (m - 1) * bs; 587 } 588 for (i = 0; i < m; i++) { 589 if (im[i] < 0) continue; 590 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 591 if (im[i] >= rstart && im[i] < rend) { 592 row = im[i] - rstart; 593 for (j = 0; j < n; j++) { 594 if (im[i] > in[j]) { 595 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 596 else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 597 } 598 /* If NumCol = 1 then a copy is not required */ 599 if ((roworiented) && (n == 1)) { 600 barray = (MatScalar *)v + i * bs2; 601 } else if ((!roworiented) && (m == 1)) { 602 barray = (MatScalar *)v + j * bs2; 603 } else { /* Here a copy is required */ 604 if (roworiented) { 605 value = v + i * (stepval + bs) * bs + j * bs; 606 } else { 607 value = v + j * (stepval + bs) * bs + i * bs; 608 } 609 for (ii = 0; ii < bs; ii++, value += stepval) { 610 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 611 } 612 barray -= bs2; 613 } 614 615 if (in[j] >= cstart && in[j] < cend) { 616 col = in[j] - cstart; 617 PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 618 } else if (in[j] < 0) { 619 continue; 620 } else { 621 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 622 if (mat->was_assembled) { 623 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 624 625 #if defined(PETSC_USE_DEBUG) 626 #if defined(PETSC_USE_CTABLE) 627 { 628 PetscInt data; 629 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 630 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 631 } 632 #else 633 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 634 #endif 635 #endif 636 #if defined(PETSC_USE_CTABLE) 637 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 638 col = (col - 1) / bs; 639 #else 640 col = (baij->colmap[in[j]] - 1) / bs; 641 #endif 642 if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 643 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 644 col = in[j]; 645 } 646 } else col = in[j]; 647 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 648 } 649 } 650 } else { 651 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 652 if (!baij->donotstash) { 653 if (roworiented) { 654 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 655 } else { 656 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 657 } 658 } 659 } 660 } 661 PetscFunctionReturn(PETSC_SUCCESS); 662 } 663 664 static PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 665 { 666 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 667 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 668 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 669 670 PetscFunctionBegin; 671 for (i = 0; i < m; i++) { 672 if (idxm[i] < 0) continue; /* negative row */ 673 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 674 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 675 row = idxm[i] - bsrstart; 676 for (j = 0; j < n; j++) { 677 if (idxn[j] < 0) continue; /* negative column */ 678 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 679 if (idxn[j] >= bscstart && idxn[j] < bscend) { 680 col = idxn[j] - bscstart; 681 PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 682 } else { 683 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 684 #if defined(PETSC_USE_CTABLE) 685 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 686 data--; 687 #else 688 data = baij->colmap[idxn[j] / bs] - 1; 689 #endif 690 if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 691 else { 692 col = data + idxn[j] % bs; 693 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 694 } 695 } 696 } 697 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 698 } 699 PetscFunctionReturn(PETSC_SUCCESS); 700 } 701 702 static PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm) 703 { 704 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 705 PetscReal sum[2], *lnorm2; 706 707 PetscFunctionBegin; 708 if (baij->size == 1) { 709 PetscCall(MatNorm(baij->A, type, norm)); 710 } else { 711 if (type == NORM_FROBENIUS) { 712 PetscCall(PetscMalloc1(2, &lnorm2)); 713 PetscCall(MatNorm(baij->A, type, lnorm2)); 714 *lnorm2 = (*lnorm2) * (*lnorm2); 715 lnorm2++; /* squar power of norm(A) */ 716 PetscCall(MatNorm(baij->B, type, lnorm2)); 717 *lnorm2 = (*lnorm2) * (*lnorm2); 718 lnorm2--; /* squar power of norm(B) */ 719 PetscCall(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 720 *norm = PetscSqrtReal(sum[0] + 2 * sum[1]); 721 PetscCall(PetscFree(lnorm2)); 722 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 723 Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data; 724 Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ *)baij->B->data; 725 PetscReal *rsum, *rsum2, vabs; 726 PetscInt *jj, *garray = baij->garray, rstart = baij->rstartbs, nz; 727 PetscInt brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs; 728 MatScalar *v; 729 730 PetscCall(PetscMalloc2(mat->cmap->N, &rsum, mat->cmap->N, &rsum2)); 731 PetscCall(PetscArrayzero(rsum, mat->cmap->N)); 732 /* Amat */ 733 v = amat->a; 734 jj = amat->j; 735 for (brow = 0; brow < mbs; brow++) { 736 grow = bs * (rstart + brow); 737 nz = amat->i[brow + 1] - amat->i[brow]; 738 for (bcol = 0; bcol < nz; bcol++) { 739 gcol = bs * (rstart + *jj); 740 jj++; 741 for (col = 0; col < bs; col++) { 742 for (row = 0; row < bs; row++) { 743 vabs = PetscAbsScalar(*v); 744 v++; 745 rsum[gcol + col] += vabs; 746 /* non-diagonal block */ 747 if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs; 748 } 749 } 750 } 751 PetscCall(PetscLogFlops(nz * bs * bs)); 752 } 753 /* Bmat */ 754 v = bmat->a; 755 jj = bmat->j; 756 for (brow = 0; brow < mbs; brow++) { 757 grow = bs * (rstart + brow); 758 nz = bmat->i[brow + 1] - bmat->i[brow]; 759 for (bcol = 0; bcol < nz; bcol++) { 760 gcol = bs * garray[*jj]; 761 jj++; 762 for (col = 0; col < bs; col++) { 763 for (row = 0; row < bs; row++) { 764 vabs = PetscAbsScalar(*v); 765 v++; 766 rsum[gcol + col] += vabs; 767 rsum[grow + row] += vabs; 768 } 769 } 770 } 771 PetscCall(PetscLogFlops(nz * bs * bs)); 772 } 773 PetscCall(MPIU_Allreduce(rsum, rsum2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 774 *norm = 0.0; 775 for (col = 0; col < mat->cmap->N; col++) { 776 if (rsum2[col] > *norm) *norm = rsum2[col]; 777 } 778 PetscCall(PetscFree2(rsum, rsum2)); 779 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet"); 780 } 781 PetscFunctionReturn(PETSC_SUCCESS); 782 } 783 784 static PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode) 785 { 786 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 787 PetscInt nstash, reallocs; 788 789 PetscFunctionBegin; 790 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 791 792 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 793 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 794 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 795 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 796 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 797 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 798 PetscFunctionReturn(PETSC_SUCCESS); 799 } 800 801 static PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode) 802 { 803 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 804 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)baij->A->data; 805 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 806 PetscInt *row, *col; 807 PetscBool other_disassembled; 808 PetscMPIInt n; 809 PetscBool r1, r2, r3; 810 MatScalar *val; 811 812 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 813 PetscFunctionBegin; 814 if (!baij->donotstash && !mat->nooffprocentries) { 815 while (1) { 816 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 817 if (!flg) break; 818 819 for (i = 0; i < n;) { 820 /* Now identify the consecutive vals belonging to the same row */ 821 for (j = i, rstart = row[j]; j < n; j++) { 822 if (row[j] != rstart) break; 823 } 824 if (j < n) ncols = j - i; 825 else ncols = n - i; 826 /* Now assemble all these values with a single function call */ 827 PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 828 i = j; 829 } 830 } 831 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 832 /* Now process the block-stash. Since the values are stashed column-oriented, 833 set the row-oriented flag to column-oriented, and after MatSetValues() 834 restore the original flags */ 835 r1 = baij->roworiented; 836 r2 = a->roworiented; 837 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 838 839 baij->roworiented = PETSC_FALSE; 840 a->roworiented = PETSC_FALSE; 841 842 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 843 while (1) { 844 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 845 if (!flg) break; 846 847 for (i = 0; i < n;) { 848 /* Now identify the consecutive vals belonging to the same row */ 849 for (j = i, rstart = row[j]; j < n; j++) { 850 if (row[j] != rstart) break; 851 } 852 if (j < n) ncols = j - i; 853 else ncols = n - i; 854 PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 855 i = j; 856 } 857 } 858 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 859 860 baij->roworiented = r1; 861 a->roworiented = r2; 862 863 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */ 864 } 865 866 PetscCall(MatAssemblyBegin(baij->A, mode)); 867 PetscCall(MatAssemblyEnd(baij->A, mode)); 868 869 /* determine if any processor has disassembled, if so we must 870 also disassemble ourselves, in order that we may reassemble. */ 871 /* 872 if nonzero structure of submatrix B cannot change then we know that 873 no processor disassembled thus we can skip this stuff 874 */ 875 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 876 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 877 if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISBAIJ(mat)); 878 } 879 880 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ } 881 PetscCall(MatAssemblyBegin(baij->B, mode)); 882 PetscCall(MatAssemblyEnd(baij->B, mode)); 883 884 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 885 886 baij->rowvalues = NULL; 887 888 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 889 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 890 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 891 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 892 } 893 PetscFunctionReturn(PETSC_SUCCESS); 894 } 895 896 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode); 897 #include <petscdraw.h> 898 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 899 { 900 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 901 PetscInt bs = mat->rmap->bs; 902 PetscMPIInt rank = baij->rank; 903 PetscBool iascii, isdraw; 904 PetscViewer sviewer; 905 PetscViewerFormat format; 906 907 PetscFunctionBegin; 908 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 909 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 910 if (iascii) { 911 PetscCall(PetscViewerGetFormat(viewer, &format)); 912 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 913 MatInfo info; 914 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 915 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 916 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 917 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 918 mat->rmap->bs, (double)info.memory)); 919 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 920 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 921 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 922 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 923 PetscCall(PetscViewerFlush(viewer)); 924 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 925 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 926 PetscCall(VecScatterView(baij->Mvctx, viewer)); 927 PetscFunctionReturn(PETSC_SUCCESS); 928 } else if (format == PETSC_VIEWER_ASCII_INFO) { 929 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 930 PetscFunctionReturn(PETSC_SUCCESS); 931 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 932 PetscFunctionReturn(PETSC_SUCCESS); 933 } 934 } 935 936 if (isdraw) { 937 PetscDraw draw; 938 PetscBool isnull; 939 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 940 PetscCall(PetscDrawIsNull(draw, &isnull)); 941 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 942 } 943 944 { 945 /* assemble the entire matrix onto first processor. */ 946 Mat A; 947 Mat_SeqSBAIJ *Aloc; 948 Mat_SeqBAIJ *Bloc; 949 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 950 MatScalar *a; 951 const char *matname; 952 953 /* Should this be the same type as mat? */ 954 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 955 if (rank == 0) { 956 PetscCall(MatSetSizes(A, M, N, M, N)); 957 } else { 958 PetscCall(MatSetSizes(A, 0, 0, M, N)); 959 } 960 PetscCall(MatSetType(A, MATMPISBAIJ)); 961 PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 962 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 963 964 /* copy over the A part */ 965 Aloc = (Mat_SeqSBAIJ *)baij->A->data; 966 ai = Aloc->i; 967 aj = Aloc->j; 968 a = Aloc->a; 969 PetscCall(PetscMalloc1(bs, &rvals)); 970 971 for (i = 0; i < mbs; i++) { 972 rvals[0] = bs * (baij->rstartbs + i); 973 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 974 for (j = ai[i]; j < ai[i + 1]; j++) { 975 col = (baij->cstartbs + aj[j]) * bs; 976 for (k = 0; k < bs; k++) { 977 PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 978 col++; 979 a += bs; 980 } 981 } 982 } 983 /* copy over the B part */ 984 Bloc = (Mat_SeqBAIJ *)baij->B->data; 985 ai = Bloc->i; 986 aj = Bloc->j; 987 a = Bloc->a; 988 for (i = 0; i < mbs; i++) { 989 rvals[0] = bs * (baij->rstartbs + i); 990 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 991 for (j = ai[i]; j < ai[i + 1]; j++) { 992 col = baij->garray[aj[j]] * bs; 993 for (k = 0; k < bs; k++) { 994 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 995 col++; 996 a += bs; 997 } 998 } 999 } 1000 PetscCall(PetscFree(rvals)); 1001 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1002 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1003 /* 1004 Everyone has to call to draw the matrix since the graphics waits are 1005 synchronized across all processors that share the PetscDraw object 1006 */ 1007 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1008 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 1009 if (rank == 0) { 1010 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)A->data)->A, matname)); 1011 PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)A->data)->A, sviewer)); 1012 } 1013 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1014 PetscCall(MatDestroy(&A)); 1015 } 1016 PetscFunctionReturn(PETSC_SUCCESS); 1017 } 1018 1019 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1020 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary 1021 1022 static PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer) 1023 { 1024 PetscBool iascii, isdraw, issocket, isbinary; 1025 1026 PetscFunctionBegin; 1027 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1028 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1029 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1030 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1031 if (iascii || isdraw || issocket) { 1032 PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer)); 1033 } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer)); 1034 PetscFunctionReturn(PETSC_SUCCESS); 1035 } 1036 1037 #if defined(PETSC_USE_COMPLEX) 1038 static PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy) 1039 { 1040 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1041 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1042 PetscScalar *from; 1043 const PetscScalar *x; 1044 1045 PetscFunctionBegin; 1046 /* diagonal part */ 1047 PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 1048 /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */ 1049 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1050 PetscCall(VecZeroEntries(a->slvec1b)); 1051 1052 /* subdiagonal part */ 1053 PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 1054 PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1055 1056 /* copy x into the vec slvec0 */ 1057 PetscCall(VecGetArray(a->slvec0, &from)); 1058 PetscCall(VecGetArrayRead(xx, &x)); 1059 1060 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1061 PetscCall(VecRestoreArray(a->slvec0, &from)); 1062 PetscCall(VecRestoreArrayRead(xx, &x)); 1063 1064 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1065 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1066 /* supperdiagonal part */ 1067 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 1068 PetscFunctionReturn(PETSC_SUCCESS); 1069 } 1070 #endif 1071 1072 static PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy) 1073 { 1074 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1075 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1076 PetscScalar *from; 1077 const PetscScalar *x; 1078 1079 PetscFunctionBegin; 1080 /* diagonal part */ 1081 PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 1082 /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */ 1083 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1084 PetscCall(VecZeroEntries(a->slvec1b)); 1085 1086 /* subdiagonal part */ 1087 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1088 1089 /* copy x into the vec slvec0 */ 1090 PetscCall(VecGetArray(a->slvec0, &from)); 1091 PetscCall(VecGetArrayRead(xx, &x)); 1092 1093 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1094 PetscCall(VecRestoreArray(a->slvec0, &from)); 1095 PetscCall(VecRestoreArrayRead(xx, &x)); 1096 1097 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1098 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1099 /* supperdiagonal part */ 1100 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 1101 PetscFunctionReturn(PETSC_SUCCESS); 1102 } 1103 1104 #if PetscDefined(USE_COMPLEX) 1105 static PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz) 1106 { 1107 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1108 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1109 PetscScalar *from; 1110 const PetscScalar *x; 1111 1112 PetscFunctionBegin; 1113 /* diagonal part */ 1114 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 1115 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1116 PetscCall(VecZeroEntries(a->slvec1b)); 1117 1118 /* subdiagonal part */ 1119 PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 1120 PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1121 1122 /* copy x into the vec slvec0 */ 1123 PetscCall(VecGetArray(a->slvec0, &from)); 1124 PetscCall(VecGetArrayRead(xx, &x)); 1125 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1126 PetscCall(VecRestoreArray(a->slvec0, &from)); 1127 1128 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1129 PetscCall(VecRestoreArrayRead(xx, &x)); 1130 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1131 1132 /* supperdiagonal part */ 1133 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 1134 PetscFunctionReturn(PETSC_SUCCESS); 1135 } 1136 #endif 1137 1138 static PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1139 { 1140 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1141 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1142 PetscScalar *from; 1143 const PetscScalar *x; 1144 1145 PetscFunctionBegin; 1146 /* diagonal part */ 1147 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 1148 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1149 PetscCall(VecZeroEntries(a->slvec1b)); 1150 1151 /* subdiagonal part */ 1152 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1153 1154 /* copy x into the vec slvec0 */ 1155 PetscCall(VecGetArray(a->slvec0, &from)); 1156 PetscCall(VecGetArrayRead(xx, &x)); 1157 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1158 PetscCall(VecRestoreArray(a->slvec0, &from)); 1159 1160 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1161 PetscCall(VecRestoreArrayRead(xx, &x)); 1162 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1163 1164 /* supperdiagonal part */ 1165 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 1166 PetscFunctionReturn(PETSC_SUCCESS); 1167 } 1168 1169 /* 1170 This only works correctly for square matrices where the subblock A->A is the 1171 diagonal block 1172 */ 1173 static PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v) 1174 { 1175 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1176 1177 PetscFunctionBegin; 1178 /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1179 PetscCall(MatGetDiagonal(a->A, v)); 1180 PetscFunctionReturn(PETSC_SUCCESS); 1181 } 1182 1183 static PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa) 1184 { 1185 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1186 1187 PetscFunctionBegin; 1188 PetscCall(MatScale(a->A, aa)); 1189 PetscCall(MatScale(a->B, aa)); 1190 PetscFunctionReturn(PETSC_SUCCESS); 1191 } 1192 1193 static PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1194 { 1195 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 1196 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1197 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1198 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1199 PetscInt *cmap, *idx_p, cstart = mat->rstartbs; 1200 1201 PetscFunctionBegin; 1202 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1203 mat->getrowactive = PETSC_TRUE; 1204 1205 if (!mat->rowvalues && (idx || v)) { 1206 /* 1207 allocate enough space to hold information from the longest row. 1208 */ 1209 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ *)mat->A->data; 1210 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ *)mat->B->data; 1211 PetscInt max = 1, mbs = mat->mbs, tmp; 1212 for (i = 0; i < mbs; i++) { 1213 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */ 1214 if (max < tmp) max = tmp; 1215 } 1216 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1217 } 1218 1219 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1220 lrow = row - brstart; /* local row index */ 1221 1222 pvA = &vworkA; 1223 pcA = &cworkA; 1224 pvB = &vworkB; 1225 pcB = &cworkB; 1226 if (!v) { 1227 pvA = NULL; 1228 pvB = NULL; 1229 } 1230 if (!idx) { 1231 pcA = NULL; 1232 if (!v) pcB = NULL; 1233 } 1234 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1235 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1236 nztot = nzA + nzB; 1237 1238 cmap = mat->garray; 1239 if (v || idx) { 1240 if (nztot) { 1241 /* Sort by increasing column numbers, assuming A and B already sorted */ 1242 PetscInt imark = -1; 1243 if (v) { 1244 *v = v_p = mat->rowvalues; 1245 for (i = 0; i < nzB; i++) { 1246 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1247 else break; 1248 } 1249 imark = i; 1250 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1251 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1252 } 1253 if (idx) { 1254 *idx = idx_p = mat->rowindices; 1255 if (imark > -1) { 1256 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1257 } else { 1258 for (i = 0; i < nzB; i++) { 1259 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1260 else break; 1261 } 1262 imark = i; 1263 } 1264 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1265 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1266 } 1267 } else { 1268 if (idx) *idx = NULL; 1269 if (v) *v = NULL; 1270 } 1271 } 1272 *nz = nztot; 1273 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1274 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1275 PetscFunctionReturn(PETSC_SUCCESS); 1276 } 1277 1278 static PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1279 { 1280 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1281 1282 PetscFunctionBegin; 1283 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1284 baij->getrowactive = PETSC_FALSE; 1285 PetscFunctionReturn(PETSC_SUCCESS); 1286 } 1287 1288 static PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1289 { 1290 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1291 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1292 1293 PetscFunctionBegin; 1294 aA->getrow_utriangular = PETSC_TRUE; 1295 PetscFunctionReturn(PETSC_SUCCESS); 1296 } 1297 static PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1298 { 1299 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1300 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1301 1302 PetscFunctionBegin; 1303 aA->getrow_utriangular = PETSC_FALSE; 1304 PetscFunctionReturn(PETSC_SUCCESS); 1305 } 1306 1307 static PetscErrorCode MatConjugate_MPISBAIJ(Mat mat) 1308 { 1309 PetscFunctionBegin; 1310 if (PetscDefined(USE_COMPLEX)) { 1311 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data; 1312 1313 PetscCall(MatConjugate(a->A)); 1314 PetscCall(MatConjugate(a->B)); 1315 } 1316 PetscFunctionReturn(PETSC_SUCCESS); 1317 } 1318 1319 static PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1320 { 1321 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1322 1323 PetscFunctionBegin; 1324 PetscCall(MatRealPart(a->A)); 1325 PetscCall(MatRealPart(a->B)); 1326 PetscFunctionReturn(PETSC_SUCCESS); 1327 } 1328 1329 static PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1330 { 1331 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1332 1333 PetscFunctionBegin; 1334 PetscCall(MatImaginaryPart(a->A)); 1335 PetscCall(MatImaginaryPart(a->B)); 1336 PetscFunctionReturn(PETSC_SUCCESS); 1337 } 1338 1339 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 1340 Input: isrow - distributed(parallel), 1341 iscol_local - locally owned (seq) 1342 */ 1343 static PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg) 1344 { 1345 PetscInt sz1, sz2, *a1, *a2, i, j, k, nmatch; 1346 const PetscInt *ptr1, *ptr2; 1347 1348 PetscFunctionBegin; 1349 *flg = PETSC_FALSE; 1350 PetscCall(ISGetLocalSize(isrow, &sz1)); 1351 PetscCall(ISGetLocalSize(iscol_local, &sz2)); 1352 if (sz1 > sz2) PetscFunctionReturn(PETSC_SUCCESS); 1353 1354 PetscCall(ISGetIndices(isrow, &ptr1)); 1355 PetscCall(ISGetIndices(iscol_local, &ptr2)); 1356 1357 PetscCall(PetscMalloc1(sz1, &a1)); 1358 PetscCall(PetscMalloc1(sz2, &a2)); 1359 PetscCall(PetscArraycpy(a1, ptr1, sz1)); 1360 PetscCall(PetscArraycpy(a2, ptr2, sz2)); 1361 PetscCall(PetscSortInt(sz1, a1)); 1362 PetscCall(PetscSortInt(sz2, a2)); 1363 1364 nmatch = 0; 1365 k = 0; 1366 for (i = 0; i < sz1; i++) { 1367 for (j = k; j < sz2; j++) { 1368 if (a1[i] == a2[j]) { 1369 k = j; 1370 nmatch++; 1371 break; 1372 } 1373 } 1374 } 1375 PetscCall(ISRestoreIndices(isrow, &ptr1)); 1376 PetscCall(ISRestoreIndices(iscol_local, &ptr2)); 1377 PetscCall(PetscFree(a1)); 1378 PetscCall(PetscFree(a2)); 1379 if (nmatch < sz1) { 1380 *flg = PETSC_FALSE; 1381 } else { 1382 *flg = PETSC_TRUE; 1383 } 1384 PetscFunctionReturn(PETSC_SUCCESS); 1385 } 1386 1387 static PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1388 { 1389 Mat C[2]; 1390 IS iscol_local, isrow_local; 1391 PetscInt csize, csize_local, rsize; 1392 PetscBool isequal, issorted, isidentity = PETSC_FALSE; 1393 1394 PetscFunctionBegin; 1395 PetscCall(ISGetLocalSize(iscol, &csize)); 1396 PetscCall(ISGetLocalSize(isrow, &rsize)); 1397 if (call == MAT_REUSE_MATRIX) { 1398 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1399 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1400 } else { 1401 PetscCall(ISAllGather(iscol, &iscol_local)); 1402 PetscCall(ISSorted(iscol_local, &issorted)); 1403 PetscCheck(issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must be sorted"); 1404 } 1405 PetscCall(ISEqual_private(isrow, iscol_local, &isequal)); 1406 if (!isequal) { 1407 PetscCall(ISGetLocalSize(iscol_local, &csize_local)); 1408 isidentity = (PetscBool)(mat->cmap->N == csize_local); 1409 if (!isidentity) { 1410 if (call == MAT_REUSE_MATRIX) { 1411 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather_other", (PetscObject *)&isrow_local)); 1412 PetscCheck(isrow_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1413 } else { 1414 PetscCall(ISAllGather(isrow, &isrow_local)); 1415 PetscCall(ISSorted(isrow_local, &issorted)); 1416 PetscCheck(issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, isrow must be sorted"); 1417 } 1418 } 1419 } 1420 /* now call MatCreateSubMatrix_MPIBAIJ() */ 1421 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, isequal || isidentity ? call : MAT_INITIAL_MATRIX, isequal || isidentity ? newmat : C, (PetscBool)(isequal || isidentity))); 1422 if (!isequal && !isidentity) { 1423 if (call == MAT_INITIAL_MATRIX) { 1424 IS intersect; 1425 PetscInt ni; 1426 1427 PetscCall(ISIntersect(isrow_local, iscol_local, &intersect)); 1428 PetscCall(ISGetLocalSize(intersect, &ni)); 1429 PetscCall(ISDestroy(&intersect)); 1430 PetscCheck(ni == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create such a submatrix: for symmetric format, when requesting an off-diagonal submatrix, isrow and iscol should have an empty intersection (number of common indices is %" PetscInt_FMT ")", ni); 1431 } 1432 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, iscol, isrow_local, rsize, MAT_INITIAL_MATRIX, C + 1, PETSC_FALSE)); 1433 PetscCall(MatTranspose(C[1], MAT_INPLACE_MATRIX, C + 1)); 1434 PetscCall(MatAXPY(C[0], 1.0, C[1], DIFFERENT_NONZERO_PATTERN)); 1435 if (call == MAT_REUSE_MATRIX) PetscCall(MatCopy(C[0], *newmat, SAME_NONZERO_PATTERN)); 1436 else if (mat->rmap->bs == 1) PetscCall(MatConvert(C[0], MATAIJ, MAT_INITIAL_MATRIX, newmat)); 1437 else PetscCall(MatCopy(C[0], *newmat, SAME_NONZERO_PATTERN)); 1438 PetscCall(MatDestroy(C)); 1439 PetscCall(MatDestroy(C + 1)); 1440 } 1441 if (call == MAT_INITIAL_MATRIX) { 1442 if (!isequal && !isidentity) { 1443 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather_other", (PetscObject)isrow_local)); 1444 PetscCall(ISDestroy(&isrow_local)); 1445 } 1446 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1447 PetscCall(ISDestroy(&iscol_local)); 1448 } 1449 PetscFunctionReturn(PETSC_SUCCESS); 1450 } 1451 1452 static PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1453 { 1454 Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data; 1455 1456 PetscFunctionBegin; 1457 PetscCall(MatZeroEntries(l->A)); 1458 PetscCall(MatZeroEntries(l->B)); 1459 PetscFunctionReturn(PETSC_SUCCESS); 1460 } 1461 1462 static PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1463 { 1464 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)matin->data; 1465 Mat A = a->A, B = a->B; 1466 PetscLogDouble isend[5], irecv[5]; 1467 1468 PetscFunctionBegin; 1469 info->block_size = (PetscReal)matin->rmap->bs; 1470 1471 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1472 1473 isend[0] = info->nz_used; 1474 isend[1] = info->nz_allocated; 1475 isend[2] = info->nz_unneeded; 1476 isend[3] = info->memory; 1477 isend[4] = info->mallocs; 1478 1479 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1480 1481 isend[0] += info->nz_used; 1482 isend[1] += info->nz_allocated; 1483 isend[2] += info->nz_unneeded; 1484 isend[3] += info->memory; 1485 isend[4] += info->mallocs; 1486 if (flag == MAT_LOCAL) { 1487 info->nz_used = isend[0]; 1488 info->nz_allocated = isend[1]; 1489 info->nz_unneeded = isend[2]; 1490 info->memory = isend[3]; 1491 info->mallocs = isend[4]; 1492 } else if (flag == MAT_GLOBAL_MAX) { 1493 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1494 1495 info->nz_used = irecv[0]; 1496 info->nz_allocated = irecv[1]; 1497 info->nz_unneeded = irecv[2]; 1498 info->memory = irecv[3]; 1499 info->mallocs = irecv[4]; 1500 } else if (flag == MAT_GLOBAL_SUM) { 1501 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1502 1503 info->nz_used = irecv[0]; 1504 info->nz_allocated = irecv[1]; 1505 info->nz_unneeded = irecv[2]; 1506 info->memory = irecv[3]; 1507 info->mallocs = irecv[4]; 1508 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1509 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1510 info->fill_ratio_needed = 0; 1511 info->factor_mallocs = 0; 1512 PetscFunctionReturn(PETSC_SUCCESS); 1513 } 1514 1515 static PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg) 1516 { 1517 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1518 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1519 1520 PetscFunctionBegin; 1521 switch (op) { 1522 case MAT_NEW_NONZERO_LOCATIONS: 1523 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1524 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1525 case MAT_KEEP_NONZERO_PATTERN: 1526 case MAT_SUBMAT_SINGLEIS: 1527 case MAT_NEW_NONZERO_LOCATION_ERR: 1528 MatCheckPreallocated(A, 1); 1529 PetscCall(MatSetOption(a->A, op, flg)); 1530 PetscCall(MatSetOption(a->B, op, flg)); 1531 break; 1532 case MAT_ROW_ORIENTED: 1533 MatCheckPreallocated(A, 1); 1534 a->roworiented = flg; 1535 1536 PetscCall(MatSetOption(a->A, op, flg)); 1537 PetscCall(MatSetOption(a->B, op, flg)); 1538 break; 1539 case MAT_FORCE_DIAGONAL_ENTRIES: 1540 case MAT_SORTED_FULL: 1541 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1542 break; 1543 case MAT_IGNORE_OFF_PROC_ENTRIES: 1544 a->donotstash = flg; 1545 break; 1546 case MAT_USE_HASH_TABLE: 1547 a->ht_flag = flg; 1548 break; 1549 case MAT_HERMITIAN: 1550 MatCheckPreallocated(A, 1); 1551 PetscCall(MatSetOption(a->A, op, flg)); 1552 #if defined(PETSC_USE_COMPLEX) 1553 if (flg) { /* need different mat-vec ops */ 1554 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1555 A->ops->multadd = MatMultAdd_MPISBAIJ_Hermitian; 1556 A->ops->multtranspose = NULL; 1557 A->ops->multtransposeadd = NULL; 1558 A->symmetric = PETSC_BOOL3_FALSE; 1559 } 1560 #endif 1561 break; 1562 case MAT_SPD: 1563 case MAT_SYMMETRIC: 1564 MatCheckPreallocated(A, 1); 1565 PetscCall(MatSetOption(a->A, op, flg)); 1566 #if defined(PETSC_USE_COMPLEX) 1567 if (flg) { /* restore to use default mat-vec ops */ 1568 A->ops->mult = MatMult_MPISBAIJ; 1569 A->ops->multadd = MatMultAdd_MPISBAIJ; 1570 A->ops->multtranspose = MatMult_MPISBAIJ; 1571 A->ops->multtransposeadd = MatMultAdd_MPISBAIJ; 1572 } 1573 #endif 1574 break; 1575 case MAT_STRUCTURALLY_SYMMETRIC: 1576 MatCheckPreallocated(A, 1); 1577 PetscCall(MatSetOption(a->A, op, flg)); 1578 break; 1579 case MAT_SYMMETRY_ETERNAL: 1580 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1581 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix must be symmetric"); 1582 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1583 break; 1584 case MAT_SPD_ETERNAL: 1585 break; 1586 case MAT_IGNORE_LOWER_TRIANGULAR: 1587 aA->ignore_ltriangular = flg; 1588 break; 1589 case MAT_ERROR_LOWER_TRIANGULAR: 1590 aA->ignore_ltriangular = flg; 1591 break; 1592 case MAT_GETROW_UPPERTRIANGULAR: 1593 aA->getrow_utriangular = flg; 1594 break; 1595 default: 1596 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1597 } 1598 PetscFunctionReturn(PETSC_SUCCESS); 1599 } 1600 1601 static PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B) 1602 { 1603 PetscFunctionBegin; 1604 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 1605 if (reuse == MAT_INITIAL_MATRIX) { 1606 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B)); 1607 } else if (reuse == MAT_REUSE_MATRIX) { 1608 PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN)); 1609 } 1610 PetscFunctionReturn(PETSC_SUCCESS); 1611 } 1612 1613 static PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr) 1614 { 1615 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1616 Mat a = baij->A, b = baij->B; 1617 PetscInt nv, m, n; 1618 PetscBool flg; 1619 1620 PetscFunctionBegin; 1621 if (ll != rr) { 1622 PetscCall(VecEqual(ll, rr, &flg)); 1623 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same"); 1624 } 1625 if (!ll) PetscFunctionReturn(PETSC_SUCCESS); 1626 1627 PetscCall(MatGetLocalSize(mat, &m, &n)); 1628 PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n); 1629 1630 PetscCall(VecGetLocalSize(rr, &nv)); 1631 PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size"); 1632 1633 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1634 1635 /* left diagonalscale the off-diagonal part */ 1636 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1637 1638 /* scale the diagonal part */ 1639 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1640 1641 /* right diagonalscale the off-diagonal part */ 1642 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1643 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1644 PetscFunctionReturn(PETSC_SUCCESS); 1645 } 1646 1647 static PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1648 { 1649 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1650 1651 PetscFunctionBegin; 1652 PetscCall(MatSetUnfactored(a->A)); 1653 PetscFunctionReturn(PETSC_SUCCESS); 1654 } 1655 1656 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *); 1657 1658 static PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag) 1659 { 1660 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data; 1661 Mat a, b, c, d; 1662 PetscBool flg; 1663 1664 PetscFunctionBegin; 1665 a = matA->A; 1666 b = matA->B; 1667 c = matB->A; 1668 d = matB->B; 1669 1670 PetscCall(MatEqual(a, c, &flg)); 1671 if (flg) PetscCall(MatEqual(b, d, &flg)); 1672 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1673 PetscFunctionReturn(PETSC_SUCCESS); 1674 } 1675 1676 static PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str) 1677 { 1678 PetscBool isbaij; 1679 1680 PetscFunctionBegin; 1681 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, "")); 1682 PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name); 1683 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1684 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1685 PetscCall(MatGetRowUpperTriangular(A)); 1686 PetscCall(MatCopy_Basic(A, B, str)); 1687 PetscCall(MatRestoreRowUpperTriangular(A)); 1688 } else { 1689 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1690 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1691 1692 PetscCall(MatCopy(a->A, b->A, str)); 1693 PetscCall(MatCopy(a->B, b->B, str)); 1694 } 1695 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1696 PetscFunctionReturn(PETSC_SUCCESS); 1697 } 1698 1699 static PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1700 { 1701 Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data; 1702 PetscBLASInt bnz, one = 1; 1703 Mat_SeqSBAIJ *xa, *ya; 1704 Mat_SeqBAIJ *xb, *yb; 1705 1706 PetscFunctionBegin; 1707 if (str == SAME_NONZERO_PATTERN) { 1708 PetscScalar alpha = a; 1709 xa = (Mat_SeqSBAIJ *)xx->A->data; 1710 ya = (Mat_SeqSBAIJ *)yy->A->data; 1711 PetscCall(PetscBLASIntCast(xa->nz, &bnz)); 1712 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one)); 1713 xb = (Mat_SeqBAIJ *)xx->B->data; 1714 yb = (Mat_SeqBAIJ *)yy->B->data; 1715 PetscCall(PetscBLASIntCast(xb->nz, &bnz)); 1716 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one)); 1717 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1718 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1719 PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 1720 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1721 PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 1722 } else { 1723 Mat B; 1724 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1725 PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 1726 PetscCall(MatGetRowUpperTriangular(X)); 1727 PetscCall(MatGetRowUpperTriangular(Y)); 1728 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1729 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1730 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1731 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1732 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1733 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1734 PetscCall(MatSetType(B, MATMPISBAIJ)); 1735 PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d)); 1736 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1737 PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1738 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1739 PetscCall(MatHeaderMerge(Y, &B)); 1740 PetscCall(PetscFree(nnz_d)); 1741 PetscCall(PetscFree(nnz_o)); 1742 PetscCall(MatRestoreRowUpperTriangular(X)); 1743 PetscCall(MatRestoreRowUpperTriangular(Y)); 1744 } 1745 PetscFunctionReturn(PETSC_SUCCESS); 1746 } 1747 1748 static PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[]) 1749 { 1750 PetscInt i; 1751 PetscBool flg; 1752 1753 PetscFunctionBegin; 1754 PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */ 1755 for (i = 0; i < n; i++) { 1756 PetscCall(ISEqual(irow[i], icol[i], &flg)); 1757 if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i])); 1758 } 1759 PetscFunctionReturn(PETSC_SUCCESS); 1760 } 1761 1762 static PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a) 1763 { 1764 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data; 1765 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)maij->A->data; 1766 1767 PetscFunctionBegin; 1768 if (!Y->preallocated) { 1769 PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 1770 } else if (!aij->nz) { 1771 PetscInt nonew = aij->nonew; 1772 PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 1773 aij->nonew = nonew; 1774 } 1775 PetscCall(MatShift_Basic(Y, a)); 1776 PetscFunctionReturn(PETSC_SUCCESS); 1777 } 1778 1779 static PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d) 1780 { 1781 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1782 1783 PetscFunctionBegin; 1784 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 1785 PetscCall(MatMissingDiagonal(a->A, missing, d)); 1786 if (d) { 1787 PetscInt rstart; 1788 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1789 *d += rstart / A->rmap->bs; 1790 } 1791 PetscFunctionReturn(PETSC_SUCCESS); 1792 } 1793 1794 static PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a) 1795 { 1796 PetscFunctionBegin; 1797 *a = ((Mat_MPISBAIJ *)A->data)->A; 1798 PetscFunctionReturn(PETSC_SUCCESS); 1799 } 1800 1801 static PetscErrorCode MatEliminateZeros_MPISBAIJ(Mat A, PetscBool keep) 1802 { 1803 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1804 1805 PetscFunctionBegin; 1806 PetscCall(MatEliminateZeros_SeqSBAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 1807 PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 1808 PetscFunctionReturn(PETSC_SUCCESS); 1809 } 1810 1811 static PetscErrorCode MatLoad_MPISBAIJ(Mat, PetscViewer); 1812 static PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat, Vec, PetscInt[]); 1813 static PetscErrorCode MatSOR_MPISBAIJ(Mat, Vec, PetscReal, MatSORType, PetscReal, PetscInt, PetscInt, Vec); 1814 1815 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1816 MatGetRow_MPISBAIJ, 1817 MatRestoreRow_MPISBAIJ, 1818 MatMult_MPISBAIJ, 1819 /* 4*/ MatMultAdd_MPISBAIJ, 1820 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1821 MatMultAdd_MPISBAIJ, 1822 NULL, 1823 NULL, 1824 NULL, 1825 /* 10*/ NULL, 1826 NULL, 1827 NULL, 1828 MatSOR_MPISBAIJ, 1829 MatTranspose_MPISBAIJ, 1830 /* 15*/ MatGetInfo_MPISBAIJ, 1831 MatEqual_MPISBAIJ, 1832 MatGetDiagonal_MPISBAIJ, 1833 MatDiagonalScale_MPISBAIJ, 1834 MatNorm_MPISBAIJ, 1835 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1836 MatAssemblyEnd_MPISBAIJ, 1837 MatSetOption_MPISBAIJ, 1838 MatZeroEntries_MPISBAIJ, 1839 /* 24*/ NULL, 1840 NULL, 1841 NULL, 1842 NULL, 1843 NULL, 1844 /* 29*/ MatSetUp_MPI_Hash, 1845 NULL, 1846 NULL, 1847 MatGetDiagonalBlock_MPISBAIJ, 1848 NULL, 1849 /* 34*/ MatDuplicate_MPISBAIJ, 1850 NULL, 1851 NULL, 1852 NULL, 1853 NULL, 1854 /* 39*/ MatAXPY_MPISBAIJ, 1855 MatCreateSubMatrices_MPISBAIJ, 1856 MatIncreaseOverlap_MPISBAIJ, 1857 MatGetValues_MPISBAIJ, 1858 MatCopy_MPISBAIJ, 1859 /* 44*/ NULL, 1860 MatScale_MPISBAIJ, 1861 MatShift_MPISBAIJ, 1862 NULL, 1863 NULL, 1864 /* 49*/ NULL, 1865 NULL, 1866 NULL, 1867 NULL, 1868 NULL, 1869 /* 54*/ NULL, 1870 NULL, 1871 MatSetUnfactored_MPISBAIJ, 1872 NULL, 1873 MatSetValuesBlocked_MPISBAIJ, 1874 /* 59*/ MatCreateSubMatrix_MPISBAIJ, 1875 NULL, 1876 NULL, 1877 NULL, 1878 NULL, 1879 /* 64*/ NULL, 1880 NULL, 1881 NULL, 1882 NULL, 1883 NULL, 1884 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1885 NULL, 1886 MatConvert_MPISBAIJ_Basic, 1887 NULL, 1888 NULL, 1889 /* 74*/ NULL, 1890 NULL, 1891 NULL, 1892 NULL, 1893 NULL, 1894 /* 79*/ NULL, 1895 NULL, 1896 NULL, 1897 NULL, 1898 MatLoad_MPISBAIJ, 1899 /* 84*/ NULL, 1900 NULL, 1901 NULL, 1902 NULL, 1903 NULL, 1904 /* 89*/ NULL, 1905 NULL, 1906 NULL, 1907 NULL, 1908 NULL, 1909 /* 94*/ NULL, 1910 NULL, 1911 NULL, 1912 NULL, 1913 NULL, 1914 /* 99*/ NULL, 1915 NULL, 1916 NULL, 1917 MatConjugate_MPISBAIJ, 1918 NULL, 1919 /*104*/ NULL, 1920 MatRealPart_MPISBAIJ, 1921 MatImaginaryPart_MPISBAIJ, 1922 MatGetRowUpperTriangular_MPISBAIJ, 1923 MatRestoreRowUpperTriangular_MPISBAIJ, 1924 /*109*/ NULL, 1925 NULL, 1926 NULL, 1927 NULL, 1928 MatMissingDiagonal_MPISBAIJ, 1929 /*114*/ NULL, 1930 NULL, 1931 NULL, 1932 NULL, 1933 NULL, 1934 /*119*/ NULL, 1935 NULL, 1936 NULL, 1937 NULL, 1938 NULL, 1939 /*124*/ NULL, 1940 NULL, 1941 NULL, 1942 NULL, 1943 NULL, 1944 /*129*/ NULL, 1945 NULL, 1946 NULL, 1947 NULL, 1948 NULL, 1949 /*134*/ NULL, 1950 NULL, 1951 NULL, 1952 NULL, 1953 NULL, 1954 /*139*/ MatSetBlockSizes_Default, 1955 NULL, 1956 NULL, 1957 NULL, 1958 NULL, 1959 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ, 1960 NULL, 1961 NULL, 1962 NULL, 1963 NULL, 1964 NULL, 1965 /*150*/ NULL, 1966 MatEliminateZeros_MPISBAIJ, 1967 NULL}; 1968 1969 static PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 1970 { 1971 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1972 PetscInt i, mbs, Mbs; 1973 PetscMPIInt size; 1974 1975 PetscFunctionBegin; 1976 if (B->hash_active) { 1977 B->ops[0] = b->cops; 1978 B->hash_active = PETSC_FALSE; 1979 } 1980 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 1981 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 1982 PetscCall(PetscLayoutSetUp(B->rmap)); 1983 PetscCall(PetscLayoutSetUp(B->cmap)); 1984 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 1985 PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N); 1986 PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n); 1987 1988 mbs = B->rmap->n / bs; 1989 Mbs = B->rmap->N / bs; 1990 PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs); 1991 1992 B->rmap->bs = bs; 1993 b->bs2 = bs * bs; 1994 b->mbs = mbs; 1995 b->Mbs = Mbs; 1996 b->nbs = B->cmap->n / bs; 1997 b->Nbs = B->cmap->N / bs; 1998 1999 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 2000 b->rstartbs = B->rmap->rstart / bs; 2001 b->rendbs = B->rmap->rend / bs; 2002 2003 b->cstartbs = B->cmap->rstart / bs; 2004 b->cendbs = B->cmap->rend / bs; 2005 2006 #if defined(PETSC_USE_CTABLE) 2007 PetscCall(PetscHMapIDestroy(&b->colmap)); 2008 #else 2009 PetscCall(PetscFree(b->colmap)); 2010 #endif 2011 PetscCall(PetscFree(b->garray)); 2012 PetscCall(VecDestroy(&b->lvec)); 2013 PetscCall(VecScatterDestroy(&b->Mvctx)); 2014 PetscCall(VecDestroy(&b->slvec0)); 2015 PetscCall(VecDestroy(&b->slvec0b)); 2016 PetscCall(VecDestroy(&b->slvec1)); 2017 PetscCall(VecDestroy(&b->slvec1a)); 2018 PetscCall(VecDestroy(&b->slvec1b)); 2019 PetscCall(VecScatterDestroy(&b->sMvctx)); 2020 2021 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2022 2023 MatSeqXAIJGetOptions_Private(b->B); 2024 PetscCall(MatDestroy(&b->B)); 2025 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2026 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2027 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 2028 MatSeqXAIJRestoreOptions_Private(b->B); 2029 2030 MatSeqXAIJGetOptions_Private(b->A); 2031 PetscCall(MatDestroy(&b->A)); 2032 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2033 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2034 PetscCall(MatSetType(b->A, MATSEQSBAIJ)); 2035 MatSeqXAIJRestoreOptions_Private(b->A); 2036 2037 PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 2038 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 2039 2040 B->preallocated = PETSC_TRUE; 2041 B->was_assembled = PETSC_FALSE; 2042 B->assembled = PETSC_FALSE; 2043 PetscFunctionReturn(PETSC_SUCCESS); 2044 } 2045 2046 static PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 2047 { 2048 PetscInt m, rstart, cend; 2049 PetscInt i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2050 const PetscInt *JJ = NULL; 2051 PetscScalar *values = NULL; 2052 PetscBool roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented; 2053 PetscBool nooffprocentries; 2054 2055 PetscFunctionBegin; 2056 PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 2057 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2058 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2059 PetscCall(PetscLayoutSetUp(B->rmap)); 2060 PetscCall(PetscLayoutSetUp(B->cmap)); 2061 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2062 m = B->rmap->n / bs; 2063 rstart = B->rmap->rstart / bs; 2064 cend = B->cmap->rend / bs; 2065 2066 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2067 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2068 for (i = 0; i < m; i++) { 2069 nz = ii[i + 1] - ii[i]; 2070 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2071 /* count the ones on the diagonal and above, split into diagonal and off-diagonal portions. */ 2072 JJ = jj + ii[i]; 2073 bd = 0; 2074 for (j = 0; j < nz; j++) { 2075 if (*JJ >= i + rstart) break; 2076 JJ++; 2077 bd++; 2078 } 2079 d = 0; 2080 for (; j < nz; j++) { 2081 if (*JJ++ >= cend) break; 2082 d++; 2083 } 2084 d_nnz[i] = d; 2085 o_nnz[i] = nz - d - bd; 2086 nz = nz - bd; 2087 nz_max = PetscMax(nz_max, nz); 2088 } 2089 PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2090 PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE)); 2091 PetscCall(PetscFree2(d_nnz, o_nnz)); 2092 2093 values = (PetscScalar *)V; 2094 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2095 for (i = 0; i < m; i++) { 2096 PetscInt row = i + rstart; 2097 PetscInt ncols = ii[i + 1] - ii[i]; 2098 const PetscInt *icols = jj + ii[i]; 2099 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2100 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2101 PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2102 } else { /* block ordering does not match so we can only insert one block at a time. */ 2103 PetscInt j; 2104 for (j = 0; j < ncols; j++) { 2105 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2106 PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2107 } 2108 } 2109 } 2110 2111 if (!V) PetscCall(PetscFree(values)); 2112 nooffprocentries = B->nooffprocentries; 2113 B->nooffprocentries = PETSC_TRUE; 2114 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2115 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2116 B->nooffprocentries = nooffprocentries; 2117 2118 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2119 PetscFunctionReturn(PETSC_SUCCESS); 2120 } 2121 2122 /*MC 2123 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2124 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2125 the matrix is stored. 2126 2127 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2128 can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`); 2129 2130 Options Database Key: 2131 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()` 2132 2133 Level: beginner 2134 2135 Note: 2136 The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the 2137 diagonal portion of the matrix of each process has to less than or equal the number of columns. 2138 2139 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType` 2140 M*/ 2141 2142 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2143 { 2144 Mat_MPISBAIJ *b; 2145 PetscBool flg = PETSC_FALSE; 2146 2147 PetscFunctionBegin; 2148 PetscCall(PetscNew(&b)); 2149 B->data = (void *)b; 2150 B->ops[0] = MatOps_Values; 2151 2152 B->ops->destroy = MatDestroy_MPISBAIJ; 2153 B->ops->view = MatView_MPISBAIJ; 2154 B->assembled = PETSC_FALSE; 2155 B->insertmode = NOT_SET_VALUES; 2156 2157 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2158 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2159 2160 /* build local table of row and column ownerships */ 2161 PetscCall(PetscMalloc1(b->size + 2, &b->rangebs)); 2162 2163 /* build cache for off array entries formed */ 2164 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2165 2166 b->donotstash = PETSC_FALSE; 2167 b->colmap = NULL; 2168 b->garray = NULL; 2169 b->roworiented = PETSC_TRUE; 2170 2171 /* stuff used in block assembly */ 2172 b->barray = NULL; 2173 2174 /* stuff used for matrix vector multiply */ 2175 b->lvec = NULL; 2176 b->Mvctx = NULL; 2177 b->slvec0 = NULL; 2178 b->slvec0b = NULL; 2179 b->slvec1 = NULL; 2180 b->slvec1a = NULL; 2181 b->slvec1b = NULL; 2182 b->sMvctx = NULL; 2183 2184 /* stuff for MatGetRow() */ 2185 b->rowindices = NULL; 2186 b->rowvalues = NULL; 2187 b->getrowactive = PETSC_FALSE; 2188 2189 /* hash table stuff */ 2190 b->ht = NULL; 2191 b->hd = NULL; 2192 b->ht_size = 0; 2193 b->ht_flag = PETSC_FALSE; 2194 b->ht_fact = 0; 2195 b->ht_total_ct = 0; 2196 b->ht_insert_ct = 0; 2197 2198 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2199 b->ijonly = PETSC_FALSE; 2200 2201 b->in_loc = NULL; 2202 b->v_loc = NULL; 2203 b->n_loc = 0; 2204 2205 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ)); 2206 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ)); 2207 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ)); 2208 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ)); 2209 #if defined(PETSC_HAVE_ELEMENTAL) 2210 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental)); 2211 #endif 2212 #if defined(PETSC_HAVE_SCALAPACK) 2213 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK)); 2214 #endif 2215 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic)); 2216 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic)); 2217 2218 B->symmetric = PETSC_BOOL3_TRUE; 2219 B->structurally_symmetric = PETSC_BOOL3_TRUE; 2220 B->symmetry_eternal = PETSC_TRUE; 2221 B->structural_symmetry_eternal = PETSC_TRUE; 2222 #if defined(PETSC_USE_COMPLEX) 2223 B->hermitian = PETSC_BOOL3_FALSE; 2224 #else 2225 B->hermitian = PETSC_BOOL3_TRUE; 2226 #endif 2227 2228 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ)); 2229 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat"); 2230 PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL)); 2231 if (flg) { 2232 PetscReal fact = 1.39; 2233 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2234 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2235 if (fact <= 1.0) fact = 1.39; 2236 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2237 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2238 } 2239 PetscOptionsEnd(); 2240 PetscFunctionReturn(PETSC_SUCCESS); 2241 } 2242 2243 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2244 /*MC 2245 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2246 2247 This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator, 2248 and `MATMPISBAIJ` otherwise. 2249 2250 Options Database Key: 2251 . -mat_type sbaij - sets the matrix type to `MATSBAIJ` during a call to `MatSetFromOptions()` 2252 2253 Level: beginner 2254 2255 .seealso: [](ch_matrices), `Mat`, `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ()`, `MATSEQSBAIJ`, `MATMPISBAIJ` 2256 M*/ 2257 2258 /*@C 2259 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2260 the user should preallocate the matrix storage by setting the parameters 2261 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2262 performance can be increased by more than a factor of 50. 2263 2264 Collective 2265 2266 Input Parameters: 2267 + B - the matrix 2268 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2269 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2270 . d_nz - number of block nonzeros per block row in diagonal portion of local 2271 submatrix (same for all local rows) 2272 . d_nnz - array containing the number of block nonzeros in the various block rows 2273 in the upper triangular and diagonal part of the in diagonal portion of the local 2274 (possibly different for each block row) or `NULL`. If you plan to factor the matrix you must leave room 2275 for the diagonal entry and set a value even if it is zero. 2276 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2277 submatrix (same for all local rows). 2278 - o_nnz - array containing the number of nonzeros in the various block rows of the 2279 off-diagonal portion of the local submatrix that is right of the diagonal 2280 (possibly different for each block row) or `NULL`. 2281 2282 Options Database Keys: 2283 + -mat_no_unroll - uses code that does not unroll the loops in the 2284 block calculations (much slower) 2285 - -mat_block_size - size of the blocks to use 2286 2287 Level: intermediate 2288 2289 Notes: 2290 2291 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2292 than it must be used on all processors that share the object for that argument. 2293 2294 If the *_nnz parameter is given then the *_nz parameter is ignored 2295 2296 Storage Information: 2297 For a square global matrix we define each processor's diagonal portion 2298 to be its local rows and the corresponding columns (a square submatrix); 2299 each processor's off-diagonal portion encompasses the remainder of the 2300 local matrix (a rectangular submatrix). 2301 2302 The user can specify preallocated storage for the diagonal part of 2303 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 2304 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 2305 memory allocation. Likewise, specify preallocated storage for the 2306 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 2307 2308 You can call `MatGetInfo()` to get information on how effective the preallocation was; 2309 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2310 You can also run with the option `-info` and look for messages with the string 2311 malloc in them to see if additional memory allocation was needed. 2312 2313 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2314 the figure below we depict these three local rows and all columns (0-11). 2315 2316 .vb 2317 0 1 2 3 4 5 6 7 8 9 10 11 2318 -------------------------- 2319 row 3 |. . . d d d o o o o o o 2320 row 4 |. . . d d d o o o o o o 2321 row 5 |. . . d d d o o o o o o 2322 -------------------------- 2323 .ve 2324 2325 Thus, any entries in the d locations are stored in the d (diagonal) 2326 submatrix, and any entries in the o locations are stored in the 2327 o (off-diagonal) submatrix. Note that the d matrix is stored in 2328 `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format. 2329 2330 Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular 2331 plus the diagonal part of the d matrix, 2332 and `o_nz` should indicate the number of block nonzeros per row in the o matrix 2333 2334 In general, for PDE problems in which most nonzeros are near the diagonal, 2335 one expects `d_nz` >> `o_nz`. 2336 2337 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()` 2338 @*/ 2339 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2340 { 2341 PetscFunctionBegin; 2342 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2343 PetscValidType(B, 1); 2344 PetscValidLogicalCollectiveInt(B, bs, 2); 2345 PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 2346 PetscFunctionReturn(PETSC_SUCCESS); 2347 } 2348 2349 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2350 /*@C 2351 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`, 2352 (block compressed row). For good matrix assembly performance 2353 the user should preallocate the matrix storage by setting the parameters 2354 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 2355 2356 Collective 2357 2358 Input Parameters: 2359 + comm - MPI communicator 2360 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2361 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 2362 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 2363 This value should be the same as the local size used in creating the 2364 y vector for the matrix-vector product y = Ax. 2365 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 2366 This value should be the same as the local size used in creating the 2367 x vector for the matrix-vector product y = Ax. 2368 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 2369 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 2370 . d_nz - number of block nonzeros per block row in diagonal portion of local 2371 submatrix (same for all local rows) 2372 . d_nnz - array containing the number of block nonzeros in the various block rows 2373 in the upper triangular portion of the in diagonal portion of the local 2374 (possibly different for each block block row) or `NULL`. 2375 If you plan to factor the matrix you must leave room for the diagonal entry and 2376 set its value even if it is zero. 2377 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2378 submatrix (same for all local rows). 2379 - o_nnz - array containing the number of nonzeros in the various block rows of the 2380 off-diagonal portion of the local submatrix (possibly different for 2381 each block row) or `NULL`. 2382 2383 Output Parameter: 2384 . A - the matrix 2385 2386 Options Database Keys: 2387 + -mat_no_unroll - uses code that does not unroll the loops in the 2388 block calculations (much slower) 2389 . -mat_block_size - size of the blocks to use 2390 - -mat_mpi - use the parallel matrix data structures even on one processor 2391 (defaults to using SeqBAIJ format on one processor) 2392 2393 Level: intermediate 2394 2395 Notes: 2396 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 2397 MatXXXXSetPreallocation() paradigm instead of this routine directly. 2398 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 2399 2400 The number of rows and columns must be divisible by blocksize. 2401 This matrix type does not support complex Hermitian operation. 2402 2403 The user MUST specify either the local or global matrix dimensions 2404 (possibly both). 2405 2406 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2407 than it must be used on all processors that share the object for that argument. 2408 2409 If `m` and `n` are not `PETSC_DECIDE`, then the values determines the `PetscLayout` of the matrix and the ranges returned by 2410 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 2411 2412 If the *_nnz parameter is given then the *_nz parameter is ignored 2413 2414 Storage Information: 2415 For a square global matrix we define each processor's diagonal portion 2416 to be its local rows and the corresponding columns (a square submatrix); 2417 each processor's off-diagonal portion encompasses the remainder of the 2418 local matrix (a rectangular submatrix). 2419 2420 The user can specify preallocated storage for the diagonal part of 2421 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 2422 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 2423 memory allocation. Likewise, specify preallocated storage for the 2424 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 2425 2426 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2427 the figure below we depict these three local rows and all columns (0-11). 2428 2429 .vb 2430 0 1 2 3 4 5 6 7 8 9 10 11 2431 -------------------------- 2432 row 3 |. . . d d d o o o o o o 2433 row 4 |. . . d d d o o o o o o 2434 row 5 |. . . d d d o o o o o o 2435 -------------------------- 2436 .ve 2437 2438 Thus, any entries in the d locations are stored in the d (diagonal) 2439 submatrix, and any entries in the o locations are stored in the 2440 o (off-diagonal) submatrix. Note that the d matrix is stored in 2441 `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format. 2442 2443 Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular 2444 plus the diagonal part of the d matrix, 2445 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 2446 In general, for PDE problems in which most nonzeros are near the diagonal, 2447 one expects `d_nz` >> `o_nz`. 2448 2449 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, 2450 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout` 2451 @*/ 2452 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 2453 { 2454 PetscMPIInt size; 2455 2456 PetscFunctionBegin; 2457 PetscCall(MatCreate(comm, A)); 2458 PetscCall(MatSetSizes(*A, m, n, M, N)); 2459 PetscCallMPI(MPI_Comm_size(comm, &size)); 2460 if (size > 1) { 2461 PetscCall(MatSetType(*A, MATMPISBAIJ)); 2462 PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 2463 } else { 2464 PetscCall(MatSetType(*A, MATSEQSBAIJ)); 2465 PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 2466 } 2467 PetscFunctionReturn(PETSC_SUCCESS); 2468 } 2469 2470 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2471 { 2472 Mat mat; 2473 Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data; 2474 PetscInt len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs; 2475 PetscScalar *array; 2476 2477 PetscFunctionBegin; 2478 *newmat = NULL; 2479 2480 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2481 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2482 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2483 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2484 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2485 2486 if (matin->hash_active) { 2487 PetscCall(MatSetUp(mat)); 2488 } else { 2489 mat->factortype = matin->factortype; 2490 mat->preallocated = PETSC_TRUE; 2491 mat->assembled = PETSC_TRUE; 2492 mat->insertmode = NOT_SET_VALUES; 2493 2494 a = (Mat_MPISBAIJ *)mat->data; 2495 a->bs2 = oldmat->bs2; 2496 a->mbs = oldmat->mbs; 2497 a->nbs = oldmat->nbs; 2498 a->Mbs = oldmat->Mbs; 2499 a->Nbs = oldmat->Nbs; 2500 2501 a->size = oldmat->size; 2502 a->rank = oldmat->rank; 2503 a->donotstash = oldmat->donotstash; 2504 a->roworiented = oldmat->roworiented; 2505 a->rowindices = NULL; 2506 a->rowvalues = NULL; 2507 a->getrowactive = PETSC_FALSE; 2508 a->barray = NULL; 2509 a->rstartbs = oldmat->rstartbs; 2510 a->rendbs = oldmat->rendbs; 2511 a->cstartbs = oldmat->cstartbs; 2512 a->cendbs = oldmat->cendbs; 2513 2514 /* hash table stuff */ 2515 a->ht = NULL; 2516 a->hd = NULL; 2517 a->ht_size = 0; 2518 a->ht_flag = oldmat->ht_flag; 2519 a->ht_fact = oldmat->ht_fact; 2520 a->ht_total_ct = 0; 2521 a->ht_insert_ct = 0; 2522 2523 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2)); 2524 if (oldmat->colmap) { 2525 #if defined(PETSC_USE_CTABLE) 2526 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 2527 #else 2528 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 2529 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 2530 #endif 2531 } else a->colmap = NULL; 2532 2533 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) { 2534 PetscCall(PetscMalloc1(len, &a->garray)); 2535 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 2536 } else a->garray = NULL; 2537 2538 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 2539 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 2540 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 2541 2542 PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0)); 2543 PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1)); 2544 2545 PetscCall(VecGetLocalSize(a->slvec1, &nt)); 2546 PetscCall(VecGetArray(a->slvec1, &array)); 2547 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a)); 2548 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b)); 2549 PetscCall(VecRestoreArray(a->slvec1, &array)); 2550 PetscCall(VecGetArray(a->slvec0, &array)); 2551 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b)); 2552 PetscCall(VecRestoreArray(a->slvec0, &array)); 2553 2554 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2555 PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx)); 2556 a->sMvctx = oldmat->sMvctx; 2557 2558 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 2559 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 2560 } 2561 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 2562 *newmat = mat; 2563 PetscFunctionReturn(PETSC_SUCCESS); 2564 } 2565 2566 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 2567 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary 2568 2569 static PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer) 2570 { 2571 PetscBool isbinary; 2572 2573 PetscFunctionBegin; 2574 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2575 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 2576 PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer)); 2577 PetscFunctionReturn(PETSC_SUCCESS); 2578 } 2579 2580 static PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[]) 2581 { 2582 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 2583 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)a->B->data; 2584 PetscReal atmp; 2585 PetscReal *work, *svalues, *rvalues; 2586 PetscInt i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol; 2587 PetscMPIInt rank, size; 2588 PetscInt *rowners_bs, dest, count, source; 2589 PetscScalar *va; 2590 MatScalar *ba; 2591 MPI_Status stat; 2592 2593 PetscFunctionBegin; 2594 PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov"); 2595 PetscCall(MatGetRowMaxAbs(a->A, v, NULL)); 2596 PetscCall(VecGetArray(v, &va)); 2597 2598 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2599 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2600 2601 bs = A->rmap->bs; 2602 mbs = a->mbs; 2603 Mbs = a->Mbs; 2604 ba = b->a; 2605 bi = b->i; 2606 bj = b->j; 2607 2608 /* find ownerships */ 2609 rowners_bs = A->rmap->range; 2610 2611 /* each proc creates an array to be distributed */ 2612 PetscCall(PetscCalloc1(bs * Mbs, &work)); 2613 2614 /* row_max for B */ 2615 if (rank != size - 1) { 2616 for (i = 0; i < mbs; i++) { 2617 ncols = bi[1] - bi[0]; 2618 bi++; 2619 brow = bs * i; 2620 for (j = 0; j < ncols; j++) { 2621 bcol = bs * (*bj); 2622 for (kcol = 0; kcol < bs; kcol++) { 2623 col = bcol + kcol; /* local col index */ 2624 col += rowners_bs[rank + 1]; /* global col index */ 2625 for (krow = 0; krow < bs; krow++) { 2626 atmp = PetscAbsScalar(*ba); 2627 ba++; 2628 row = brow + krow; /* local row index */ 2629 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2630 if (work[col] < atmp) work[col] = atmp; 2631 } 2632 } 2633 bj++; 2634 } 2635 } 2636 2637 /* send values to its owners */ 2638 for (dest = rank + 1; dest < size; dest++) { 2639 svalues = work + rowners_bs[dest]; 2640 count = rowners_bs[dest + 1] - rowners_bs[dest]; 2641 PetscCallMPI(MPI_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A))); 2642 } 2643 } 2644 2645 /* receive values */ 2646 if (rank) { 2647 rvalues = work; 2648 count = rowners_bs[rank + 1] - rowners_bs[rank]; 2649 for (source = 0; source < rank; source++) { 2650 PetscCallMPI(MPI_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat)); 2651 /* process values */ 2652 for (i = 0; i < count; i++) { 2653 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2654 } 2655 } 2656 } 2657 2658 PetscCall(VecRestoreArray(v, &va)); 2659 PetscCall(PetscFree(work)); 2660 PetscFunctionReturn(PETSC_SUCCESS); 2661 } 2662 2663 static PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2664 { 2665 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 2666 PetscInt mbs = mat->mbs, bs = matin->rmap->bs; 2667 PetscScalar *x, *ptr, *from; 2668 Vec bb1; 2669 const PetscScalar *b; 2670 2671 PetscFunctionBegin; 2672 PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 2673 PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented"); 2674 2675 if (flag == SOR_APPLY_UPPER) { 2676 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2677 PetscFunctionReturn(PETSC_SUCCESS); 2678 } 2679 2680 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2681 if (flag & SOR_ZERO_INITIAL_GUESS) { 2682 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx)); 2683 its--; 2684 } 2685 2686 PetscCall(VecDuplicate(bb, &bb1)); 2687 while (its--) { 2688 /* lower triangular part: slvec0b = - B^T*xx */ 2689 PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 2690 2691 /* copy xx into slvec0a */ 2692 PetscCall(VecGetArray(mat->slvec0, &ptr)); 2693 PetscCall(VecGetArray(xx, &x)); 2694 PetscCall(PetscArraycpy(ptr, x, bs * mbs)); 2695 PetscCall(VecRestoreArray(mat->slvec0, &ptr)); 2696 2697 PetscCall(VecScale(mat->slvec0, -1.0)); 2698 2699 /* copy bb into slvec1a */ 2700 PetscCall(VecGetArray(mat->slvec1, &ptr)); 2701 PetscCall(VecGetArrayRead(bb, &b)); 2702 PetscCall(PetscArraycpy(ptr, b, bs * mbs)); 2703 PetscCall(VecRestoreArray(mat->slvec1, &ptr)); 2704 2705 /* set slvec1b = 0 */ 2706 PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b)); 2707 PetscCall(VecZeroEntries(mat->slvec1b)); 2708 2709 PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2710 PetscCall(VecRestoreArray(xx, &x)); 2711 PetscCall(VecRestoreArrayRead(bb, &b)); 2712 PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2713 2714 /* upper triangular part: bb1 = bb1 - B*x */ 2715 PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1)); 2716 2717 /* local diagonal sweep */ 2718 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx)); 2719 } 2720 PetscCall(VecDestroy(&bb1)); 2721 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2722 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2723 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2724 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2725 } else if (flag & SOR_EISENSTAT) { 2726 Vec xx1; 2727 PetscBool hasop; 2728 const PetscScalar *diag; 2729 PetscScalar *sl, scale = (omega - 2.0) / omega; 2730 PetscInt i, n; 2731 2732 if (!mat->xx1) { 2733 PetscCall(VecDuplicate(bb, &mat->xx1)); 2734 PetscCall(VecDuplicate(bb, &mat->bb1)); 2735 } 2736 xx1 = mat->xx1; 2737 bb1 = mat->bb1; 2738 2739 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 2740 2741 if (!mat->diag) { 2742 /* this is wrong for same matrix with new nonzero values */ 2743 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 2744 PetscCall(MatGetDiagonal(matin, mat->diag)); 2745 } 2746 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 2747 2748 if (hasop) { 2749 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 2750 PetscCall(VecAYPX(mat->slvec1a, scale, bb)); 2751 } else { 2752 /* 2753 These two lines are replaced by code that may be a bit faster for a good compiler 2754 PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx)); 2755 PetscCall(VecAYPX(mat->slvec1a,scale,bb)); 2756 */ 2757 PetscCall(VecGetArray(mat->slvec1a, &sl)); 2758 PetscCall(VecGetArrayRead(mat->diag, &diag)); 2759 PetscCall(VecGetArrayRead(bb, &b)); 2760 PetscCall(VecGetArray(xx, &x)); 2761 PetscCall(VecGetLocalSize(xx, &n)); 2762 if (omega == 1.0) { 2763 for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i]; 2764 PetscCall(PetscLogFlops(2.0 * n)); 2765 } else { 2766 for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i]; 2767 PetscCall(PetscLogFlops(3.0 * n)); 2768 } 2769 PetscCall(VecRestoreArray(mat->slvec1a, &sl)); 2770 PetscCall(VecRestoreArrayRead(mat->diag, &diag)); 2771 PetscCall(VecRestoreArrayRead(bb, &b)); 2772 PetscCall(VecRestoreArray(xx, &x)); 2773 } 2774 2775 /* multiply off-diagonal portion of matrix */ 2776 PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b)); 2777 PetscCall(VecZeroEntries(mat->slvec1b)); 2778 PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 2779 PetscCall(VecGetArray(mat->slvec0, &from)); 2780 PetscCall(VecGetArray(xx, &x)); 2781 PetscCall(PetscArraycpy(from, x, bs * mbs)); 2782 PetscCall(VecRestoreArray(mat->slvec0, &from)); 2783 PetscCall(VecRestoreArray(xx, &x)); 2784 PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2785 PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2786 PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a)); 2787 2788 /* local sweep */ 2789 PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 2790 PetscCall(VecAXPY(xx, 1.0, xx1)); 2791 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format"); 2792 PetscFunctionReturn(PETSC_SUCCESS); 2793 } 2794 2795 /*@ 2796 MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard CSR format for the local rows. 2797 2798 Collective 2799 2800 Input Parameters: 2801 + comm - MPI communicator 2802 . bs - the block size, only a block size of 1 is supported 2803 . m - number of local rows (Cannot be `PETSC_DECIDE`) 2804 . n - This value should be the same as the local size used in creating the 2805 x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have 2806 calculated if `N` is given) For square matrices `n` is almost always `m`. 2807 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 2808 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 2809 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 2810 . j - column indices 2811 - a - matrix values 2812 2813 Output Parameter: 2814 . mat - the matrix 2815 2816 Level: intermediate 2817 2818 Notes: 2819 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 2820 thus you CANNOT change the matrix entries by changing the values of `a` after you have 2821 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 2822 2823 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 2824 2825 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 2826 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatMPISBAIJSetPreallocationCSR()` 2827 @*/ 2828 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 2829 { 2830 PetscFunctionBegin; 2831 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 2832 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2833 PetscCall(MatCreate(comm, mat)); 2834 PetscCall(MatSetSizes(*mat, m, n, M, N)); 2835 PetscCall(MatSetType(*mat, MATMPISBAIJ)); 2836 PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 2837 PetscFunctionReturn(PETSC_SUCCESS); 2838 } 2839 2840 /*@C 2841 MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values 2842 2843 Collective 2844 2845 Input Parameters: 2846 + B - the matrix 2847 . bs - the block size 2848 . i - the indices into `j` for the start of each local row (indices start with zero) 2849 . j - the column indices for each local row (indices start with zero) these must be sorted for each row 2850 - v - optional values in the matrix, pass `NULL` if not provided 2851 2852 Level: advanced 2853 2854 Notes: 2855 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 2856 thus you CANNOT change the matrix entries by changing the values of `v` after you have 2857 called this routine. 2858 2859 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries 2860 and usually the numerical values as well 2861 2862 Any entries passed in that are below the diagonal are ignored 2863 2864 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, 2865 `MatCreateMPISBAIJWithArrays()` 2866 @*/ 2867 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2868 { 2869 PetscFunctionBegin; 2870 PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2871 PetscFunctionReturn(PETSC_SUCCESS); 2872 } 2873 2874 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 2875 { 2876 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 2877 PetscInt *indx; 2878 PetscScalar *values; 2879 2880 PetscFunctionBegin; 2881 PetscCall(MatGetSize(inmat, &m, &N)); 2882 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 2883 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data; 2884 PetscInt *dnz, *onz, mbs, Nbs, nbs; 2885 PetscInt *bindx, rmax = a->rmax, j; 2886 PetscMPIInt rank, size; 2887 2888 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 2889 mbs = m / bs; 2890 Nbs = N / cbs; 2891 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 2892 nbs = n / cbs; 2893 2894 PetscCall(PetscMalloc1(rmax, &bindx)); 2895 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 2896 2897 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2898 PetscCallMPI(MPI_Comm_rank(comm, &size)); 2899 if (rank == size - 1) { 2900 /* Check sum(nbs) = Nbs */ 2901 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 2902 } 2903 2904 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 2905 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 2906 for (i = 0; i < mbs; i++) { 2907 PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 2908 nnz = nnz / bs; 2909 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 2910 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 2911 PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 2912 } 2913 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 2914 PetscCall(PetscFree(bindx)); 2915 2916 PetscCall(MatCreate(comm, outmat)); 2917 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 2918 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 2919 PetscCall(MatSetType(*outmat, MATSBAIJ)); 2920 PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz)); 2921 PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 2922 MatPreallocateEnd(dnz, onz); 2923 } 2924 2925 /* numeric phase */ 2926 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 2927 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 2928 2929 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 2930 for (i = 0; i < m; i++) { 2931 PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 2932 Ii = i + rstart; 2933 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 2934 PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 2935 } 2936 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 2937 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 2938 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 2939 PetscFunctionReturn(PETSC_SUCCESS); 2940 } 2941