1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petscblaslapack.h> 5 6 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 7 { 8 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 9 10 PetscFunctionBegin; 11 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 12 PetscCall(MatStashDestroy_Private(&mat->stash)); 13 PetscCall(MatStashDestroy_Private(&mat->bstash)); 14 PetscCall(MatDestroy(&baij->A)); 15 PetscCall(MatDestroy(&baij->B)); 16 #if defined(PETSC_USE_CTABLE) 17 PetscCall(PetscHMapIDestroy(&baij->colmap)); 18 #else 19 PetscCall(PetscFree(baij->colmap)); 20 #endif 21 PetscCall(PetscFree(baij->garray)); 22 PetscCall(VecDestroy(&baij->lvec)); 23 PetscCall(VecScatterDestroy(&baij->Mvctx)); 24 PetscCall(VecDestroy(&baij->slvec0)); 25 PetscCall(VecDestroy(&baij->slvec0b)); 26 PetscCall(VecDestroy(&baij->slvec1)); 27 PetscCall(VecDestroy(&baij->slvec1a)); 28 PetscCall(VecDestroy(&baij->slvec1b)); 29 PetscCall(VecScatterDestroy(&baij->sMvctx)); 30 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 31 PetscCall(PetscFree(baij->barray)); 32 PetscCall(PetscFree(baij->hd)); 33 PetscCall(VecDestroy(&baij->diag)); 34 PetscCall(VecDestroy(&baij->bb1)); 35 PetscCall(VecDestroy(&baij->xx1)); 36 #if defined(PETSC_USE_REAL_MAT_SINGLE) 37 PetscCall(PetscFree(baij->setvaluescopy)); 38 #endif 39 PetscCall(PetscFree(baij->in_loc)); 40 PetscCall(PetscFree(baij->v_loc)); 41 PetscCall(PetscFree(baij->rangebs)); 42 PetscCall(PetscFree(mat->data)); 43 44 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 45 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 47 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL)); 48 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL)); 49 #if defined(PETSC_HAVE_ELEMENTAL) 50 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL)); 51 #endif 52 #if defined(PETSC_HAVE_SCALAPACK) 53 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL)); 54 #endif 55 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL)); 56 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL)); 57 PetscFunctionReturn(PETSC_SUCCESS); 58 } 59 60 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), MatAssemblyEnd_MPI_Hash(), MatSetUp_MPI_Hash() */ 61 #define TYPE SBAIJ 62 #define TYPE_SBAIJ 63 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 64 #undef TYPE 65 #undef TYPE_SBAIJ 66 67 #if defined(PETSC_HAVE_ELEMENTAL) 68 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 69 #endif 70 #if defined(PETSC_HAVE_SCALAPACK) 71 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 72 #endif 73 74 /* This could be moved to matimpl.h */ 75 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) 76 { 77 Mat preallocator; 78 PetscInt r, rstart, rend; 79 PetscInt bs, i, m, n, M, N; 80 PetscBool cong = PETSC_TRUE; 81 82 PetscFunctionBegin; 83 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 84 PetscValidLogicalCollectiveInt(B, nm, 2); 85 for (i = 0; i < nm; i++) { 86 PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3); 87 PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong)); 88 PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts"); 89 } 90 PetscValidLogicalCollectiveBool(B, fill, 5); 91 PetscCall(MatGetBlockSize(B, &bs)); 92 PetscCall(MatGetSize(B, &M, &N)); 93 PetscCall(MatGetLocalSize(B, &m, &n)); 94 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator)); 95 PetscCall(MatSetType(preallocator, MATPREALLOCATOR)); 96 PetscCall(MatSetBlockSize(preallocator, bs)); 97 PetscCall(MatSetSizes(preallocator, m, n, M, N)); 98 PetscCall(MatSetUp(preallocator)); 99 PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend)); 100 for (r = rstart; r < rend; ++r) { 101 PetscInt ncols; 102 const PetscInt *row; 103 const PetscScalar *vals; 104 105 for (i = 0; i < nm; i++) { 106 PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals)); 107 PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES)); 108 if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES)); 109 PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals)); 110 } 111 } 112 PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY)); 113 PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY)); 114 PetscCall(MatPreallocatorPreallocate(preallocator, fill, B)); 115 PetscCall(MatDestroy(&preallocator)); 116 PetscFunctionReturn(PETSC_SUCCESS); 117 } 118 119 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 120 { 121 Mat B; 122 PetscInt r; 123 124 PetscFunctionBegin; 125 if (reuse != MAT_REUSE_MATRIX) { 126 PetscBool symm = PETSC_TRUE, isdense; 127 PetscInt bs; 128 129 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 130 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 131 PetscCall(MatSetType(B, newtype)); 132 PetscCall(MatGetBlockSize(A, &bs)); 133 PetscCall(MatSetBlockSize(B, bs)); 134 PetscCall(PetscLayoutSetUp(B->rmap)); 135 PetscCall(PetscLayoutSetUp(B->cmap)); 136 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, "")); 137 if (!isdense) { 138 PetscCall(MatGetRowUpperTriangular(A)); 139 PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE)); 140 PetscCall(MatRestoreRowUpperTriangular(A)); 141 } else { 142 PetscCall(MatSetUp(B)); 143 } 144 } else { 145 B = *newmat; 146 PetscCall(MatZeroEntries(B)); 147 } 148 149 PetscCall(MatGetRowUpperTriangular(A)); 150 for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 151 PetscInt ncols; 152 const PetscInt *row; 153 const PetscScalar *vals; 154 155 PetscCall(MatGetRow(A, r, &ncols, &row, &vals)); 156 PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES)); 157 #if defined(PETSC_USE_COMPLEX) 158 if (A->hermitian == PETSC_BOOL3_TRUE) { 159 PetscInt i; 160 for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES)); 161 } else { 162 PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 163 } 164 #else 165 PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 166 #endif 167 PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals)); 168 } 169 PetscCall(MatRestoreRowUpperTriangular(A)); 170 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 171 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 172 173 if (reuse == MAT_INPLACE_MATRIX) { 174 PetscCall(MatHeaderReplace(A, &B)); 175 } else { 176 *newmat = B; 177 } 178 PetscFunctionReturn(PETSC_SUCCESS); 179 } 180 181 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 182 { 183 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 184 185 PetscFunctionBegin; 186 PetscCall(MatStoreValues(aij->A)); 187 PetscCall(MatStoreValues(aij->B)); 188 PetscFunctionReturn(PETSC_SUCCESS); 189 } 190 191 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 192 { 193 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 194 195 PetscFunctionBegin; 196 PetscCall(MatRetrieveValues(aij->A)); 197 PetscCall(MatRetrieveValues(aij->B)); 198 PetscFunctionReturn(PETSC_SUCCESS); 199 } 200 201 #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 202 do { \ 203 brow = row / bs; \ 204 rp = aj + ai[brow]; \ 205 ap = aa + bs2 * ai[brow]; \ 206 rmax = aimax[brow]; \ 207 nrow = ailen[brow]; \ 208 bcol = col / bs; \ 209 ridx = row % bs; \ 210 cidx = col % bs; \ 211 low = 0; \ 212 high = nrow; \ 213 while (high - low > 3) { \ 214 t = (low + high) / 2; \ 215 if (rp[t] > bcol) high = t; \ 216 else low = t; \ 217 } \ 218 for (_i = low; _i < high; _i++) { \ 219 if (rp[_i] > bcol) break; \ 220 if (rp[_i] == bcol) { \ 221 bap = ap + bs2 * _i + bs * cidx + ridx; \ 222 if (addv == ADD_VALUES) *bap += value; \ 223 else *bap = value; \ 224 goto a_noinsert; \ 225 } \ 226 } \ 227 if (a->nonew == 1) goto a_noinsert; \ 228 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 229 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 230 N = nrow++ - 1; \ 231 /* shift up all the later entries in this row */ \ 232 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 233 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 234 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 235 rp[_i] = bcol; \ 236 ap[bs2 * _i + bs * cidx + ridx] = value; \ 237 A->nonzerostate++; \ 238 a_noinsert:; \ 239 ailen[brow] = nrow; \ 240 } while (0) 241 242 #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 243 do { \ 244 brow = row / bs; \ 245 rp = bj + bi[brow]; \ 246 ap = ba + bs2 * bi[brow]; \ 247 rmax = bimax[brow]; \ 248 nrow = bilen[brow]; \ 249 bcol = col / bs; \ 250 ridx = row % bs; \ 251 cidx = col % bs; \ 252 low = 0; \ 253 high = nrow; \ 254 while (high - low > 3) { \ 255 t = (low + high) / 2; \ 256 if (rp[t] > bcol) high = t; \ 257 else low = t; \ 258 } \ 259 for (_i = low; _i < high; _i++) { \ 260 if (rp[_i] > bcol) break; \ 261 if (rp[_i] == bcol) { \ 262 bap = ap + bs2 * _i + bs * cidx + ridx; \ 263 if (addv == ADD_VALUES) *bap += value; \ 264 else *bap = value; \ 265 goto b_noinsert; \ 266 } \ 267 } \ 268 if (b->nonew == 1) goto b_noinsert; \ 269 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 270 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 271 N = nrow++ - 1; \ 272 /* shift up all the later entries in this row */ \ 273 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 274 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 275 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 276 rp[_i] = bcol; \ 277 ap[bs2 * _i + bs * cidx + ridx] = value; \ 278 B->nonzerostate++; \ 279 b_noinsert:; \ 280 bilen[brow] = nrow; \ 281 } while (0) 282 283 /* Only add/insert a(i,j) with i<=j (blocks). 284 Any a(i,j) with i>j input by user is ignored or generates an error 285 */ 286 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 287 { 288 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 289 MatScalar value; 290 PetscBool roworiented = baij->roworiented; 291 PetscInt i, j, row, col; 292 PetscInt rstart_orig = mat->rmap->rstart; 293 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 294 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 295 296 /* Some Variables required in the macro */ 297 Mat A = baij->A; 298 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)(A)->data; 299 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 300 MatScalar *aa = a->a; 301 302 Mat B = baij->B; 303 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(B)->data; 304 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 305 MatScalar *ba = b->a; 306 307 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 308 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 309 MatScalar *ap, *bap; 310 311 /* for stash */ 312 PetscInt n_loc, *in_loc = NULL; 313 MatScalar *v_loc = NULL; 314 315 PetscFunctionBegin; 316 if (!baij->donotstash) { 317 if (n > baij->n_loc) { 318 PetscCall(PetscFree(baij->in_loc)); 319 PetscCall(PetscFree(baij->v_loc)); 320 PetscCall(PetscMalloc1(n, &baij->in_loc)); 321 PetscCall(PetscMalloc1(n, &baij->v_loc)); 322 323 baij->n_loc = n; 324 } 325 in_loc = baij->in_loc; 326 v_loc = baij->v_loc; 327 } 328 329 for (i = 0; i < m; i++) { 330 if (im[i] < 0) continue; 331 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 332 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 333 row = im[i] - rstart_orig; /* local row index */ 334 for (j = 0; j < n; j++) { 335 if (im[i] / bs > in[j] / bs) { 336 if (a->ignore_ltriangular) { 337 continue; /* ignore lower triangular blocks */ 338 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 339 } 340 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 341 col = in[j] - cstart_orig; /* local col index */ 342 brow = row / bs; 343 bcol = col / bs; 344 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 345 if (roworiented) value = v[i * n + j]; 346 else value = v[i + j * m]; 347 MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 348 /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */ 349 } else if (in[j] < 0) { 350 continue; 351 } else { 352 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 353 /* off-diag entry (B) */ 354 if (mat->was_assembled) { 355 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 356 #if defined(PETSC_USE_CTABLE) 357 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 358 col = col - 1; 359 #else 360 col = baij->colmap[in[j] / bs] - 1; 361 #endif 362 if (col < 0 && !((Mat_SeqSBAIJ *)(baij->A->data))->nonew) { 363 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 364 col = in[j]; 365 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 366 B = baij->B; 367 b = (Mat_SeqBAIJ *)(B)->data; 368 bimax = b->imax; 369 bi = b->i; 370 bilen = b->ilen; 371 bj = b->j; 372 ba = b->a; 373 } else col += in[j] % bs; 374 } else col = in[j]; 375 if (roworiented) value = v[i * n + j]; 376 else value = v[i + j * m]; 377 MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 378 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 379 } 380 } 381 } else { /* off processor entry */ 382 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 383 if (!baij->donotstash) { 384 mat->assembled = PETSC_FALSE; 385 n_loc = 0; 386 for (j = 0; j < n; j++) { 387 if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */ 388 in_loc[n_loc] = in[j]; 389 if (roworiented) { 390 v_loc[n_loc] = v[i * n + j]; 391 } else { 392 v_loc[n_loc] = v[j * m + i]; 393 } 394 n_loc++; 395 } 396 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE)); 397 } 398 } 399 } 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 404 { 405 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 406 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 407 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 408 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 409 PetscBool roworiented = a->roworiented; 410 const PetscScalar *value = v; 411 MatScalar *ap, *aa = a->a, *bap; 412 413 PetscFunctionBegin; 414 if (col < row) { 415 if (a->ignore_ltriangular) PetscFunctionReturn(PETSC_SUCCESS); /* ignore lower triangular block */ 416 else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 417 } 418 rp = aj + ai[row]; 419 ap = aa + bs2 * ai[row]; 420 rmax = imax[row]; 421 nrow = ailen[row]; 422 value = v; 423 low = 0; 424 high = nrow; 425 426 while (high - low > 7) { 427 t = (low + high) / 2; 428 if (rp[t] > col) high = t; 429 else low = t; 430 } 431 for (i = low; i < high; i++) { 432 if (rp[i] > col) break; 433 if (rp[i] == col) { 434 bap = ap + bs2 * i; 435 if (roworiented) { 436 if (is == ADD_VALUES) { 437 for (ii = 0; ii < bs; ii++) { 438 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 439 } 440 } else { 441 for (ii = 0; ii < bs; ii++) { 442 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 443 } 444 } 445 } else { 446 if (is == ADD_VALUES) { 447 for (ii = 0; ii < bs; ii++) { 448 for (jj = 0; jj < bs; jj++) *bap++ += *value++; 449 } 450 } else { 451 for (ii = 0; ii < bs; ii++) { 452 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 453 } 454 } 455 } 456 goto noinsert2; 457 } 458 } 459 if (nonew == 1) goto noinsert2; 460 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 461 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 462 N = nrow++ - 1; 463 high++; 464 /* shift up all the later entries in this row */ 465 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 466 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 467 rp[i] = col; 468 bap = ap + bs2 * i; 469 if (roworiented) { 470 for (ii = 0; ii < bs; ii++) { 471 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 472 } 473 } else { 474 for (ii = 0; ii < bs; ii++) { 475 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 476 } 477 } 478 noinsert2:; 479 ailen[row] = nrow; 480 PetscFunctionReturn(PETSC_SUCCESS); 481 } 482 483 /* 484 This routine is exactly duplicated in mpibaij.c 485 */ 486 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 487 { 488 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 489 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 490 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 491 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 492 PetscBool roworiented = a->roworiented; 493 const PetscScalar *value = v; 494 MatScalar *ap, *aa = a->a, *bap; 495 496 PetscFunctionBegin; 497 rp = aj + ai[row]; 498 ap = aa + bs2 * ai[row]; 499 rmax = imax[row]; 500 nrow = ailen[row]; 501 low = 0; 502 high = nrow; 503 value = v; 504 while (high - low > 7) { 505 t = (low + high) / 2; 506 if (rp[t] > col) high = t; 507 else low = t; 508 } 509 for (i = low; i < high; i++) { 510 if (rp[i] > col) break; 511 if (rp[i] == col) { 512 bap = ap + bs2 * i; 513 if (roworiented) { 514 if (is == ADD_VALUES) { 515 for (ii = 0; ii < bs; ii++) { 516 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 517 } 518 } else { 519 for (ii = 0; ii < bs; ii++) { 520 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 521 } 522 } 523 } else { 524 if (is == ADD_VALUES) { 525 for (ii = 0; ii < bs; ii++, value += bs) { 526 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 527 bap += bs; 528 } 529 } else { 530 for (ii = 0; ii < bs; ii++, value += bs) { 531 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 532 bap += bs; 533 } 534 } 535 } 536 goto noinsert2; 537 } 538 } 539 if (nonew == 1) goto noinsert2; 540 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 541 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 542 N = nrow++ - 1; 543 high++; 544 /* shift up all the later entries in this row */ 545 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 546 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 547 rp[i] = col; 548 bap = ap + bs2 * i; 549 if (roworiented) { 550 for (ii = 0; ii < bs; ii++) { 551 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 552 } 553 } else { 554 for (ii = 0; ii < bs; ii++) { 555 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 556 } 557 } 558 noinsert2:; 559 ailen[row] = nrow; 560 PetscFunctionReturn(PETSC_SUCCESS); 561 } 562 563 /* 564 This routine could be optimized by removing the need for the block copy below and passing stride information 565 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 566 */ 567 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv) 568 { 569 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 570 const MatScalar *value; 571 MatScalar *barray = baij->barray; 572 PetscBool roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular; 573 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 574 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 575 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 576 577 PetscFunctionBegin; 578 if (!barray) { 579 PetscCall(PetscMalloc1(bs2, &barray)); 580 baij->barray = barray; 581 } 582 583 if (roworiented) { 584 stepval = (n - 1) * bs; 585 } else { 586 stepval = (m - 1) * bs; 587 } 588 for (i = 0; i < m; i++) { 589 if (im[i] < 0) continue; 590 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 591 if (im[i] >= rstart && im[i] < rend) { 592 row = im[i] - rstart; 593 for (j = 0; j < n; j++) { 594 if (im[i] > in[j]) { 595 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 596 else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 597 } 598 /* If NumCol = 1 then a copy is not required */ 599 if ((roworiented) && (n == 1)) { 600 barray = (MatScalar *)v + i * bs2; 601 } else if ((!roworiented) && (m == 1)) { 602 barray = (MatScalar *)v + j * bs2; 603 } else { /* Here a copy is required */ 604 if (roworiented) { 605 value = v + i * (stepval + bs) * bs + j * bs; 606 } else { 607 value = v + j * (stepval + bs) * bs + i * bs; 608 } 609 for (ii = 0; ii < bs; ii++, value += stepval) { 610 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 611 } 612 barray -= bs2; 613 } 614 615 if (in[j] >= cstart && in[j] < cend) { 616 col = in[j] - cstart; 617 PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 618 } else if (in[j] < 0) { 619 continue; 620 } else { 621 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 622 if (mat->was_assembled) { 623 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 624 625 #if defined(PETSC_USE_DEBUG) 626 #if defined(PETSC_USE_CTABLE) 627 { 628 PetscInt data; 629 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 630 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 631 } 632 #else 633 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 634 #endif 635 #endif 636 #if defined(PETSC_USE_CTABLE) 637 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 638 col = (col - 1) / bs; 639 #else 640 col = (baij->colmap[in[j]] - 1) / bs; 641 #endif 642 if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 643 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 644 col = in[j]; 645 } 646 } else col = in[j]; 647 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 648 } 649 } 650 } else { 651 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 652 if (!baij->donotstash) { 653 if (roworiented) { 654 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 655 } else { 656 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 657 } 658 } 659 } 660 } 661 PetscFunctionReturn(PETSC_SUCCESS); 662 } 663 664 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 665 { 666 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 667 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 668 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 669 670 PetscFunctionBegin; 671 for (i = 0; i < m; i++) { 672 if (idxm[i] < 0) continue; /* negative row */ 673 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 674 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 675 row = idxm[i] - bsrstart; 676 for (j = 0; j < n; j++) { 677 if (idxn[j] < 0) continue; /* negative column */ 678 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 679 if (idxn[j] >= bscstart && idxn[j] < bscend) { 680 col = idxn[j] - bscstart; 681 PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 682 } else { 683 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 684 #if defined(PETSC_USE_CTABLE) 685 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 686 data--; 687 #else 688 data = baij->colmap[idxn[j] / bs] - 1; 689 #endif 690 if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 691 else { 692 col = data + idxn[j] % bs; 693 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 694 } 695 } 696 } 697 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 698 } 699 PetscFunctionReturn(PETSC_SUCCESS); 700 } 701 702 PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm) 703 { 704 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 705 PetscReal sum[2], *lnorm2; 706 707 PetscFunctionBegin; 708 if (baij->size == 1) { 709 PetscCall(MatNorm(baij->A, type, norm)); 710 } else { 711 if (type == NORM_FROBENIUS) { 712 PetscCall(PetscMalloc1(2, &lnorm2)); 713 PetscCall(MatNorm(baij->A, type, lnorm2)); 714 *lnorm2 = (*lnorm2) * (*lnorm2); 715 lnorm2++; /* squar power of norm(A) */ 716 PetscCall(MatNorm(baij->B, type, lnorm2)); 717 *lnorm2 = (*lnorm2) * (*lnorm2); 718 lnorm2--; /* squar power of norm(B) */ 719 PetscCall(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 720 *norm = PetscSqrtReal(sum[0] + 2 * sum[1]); 721 PetscCall(PetscFree(lnorm2)); 722 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 723 Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data; 724 Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ *)baij->B->data; 725 PetscReal *rsum, *rsum2, vabs; 726 PetscInt *jj, *garray = baij->garray, rstart = baij->rstartbs, nz; 727 PetscInt brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs; 728 MatScalar *v; 729 730 PetscCall(PetscMalloc2(mat->cmap->N, &rsum, mat->cmap->N, &rsum2)); 731 PetscCall(PetscArrayzero(rsum, mat->cmap->N)); 732 /* Amat */ 733 v = amat->a; 734 jj = amat->j; 735 for (brow = 0; brow < mbs; brow++) { 736 grow = bs * (rstart + brow); 737 nz = amat->i[brow + 1] - amat->i[brow]; 738 for (bcol = 0; bcol < nz; bcol++) { 739 gcol = bs * (rstart + *jj); 740 jj++; 741 for (col = 0; col < bs; col++) { 742 for (row = 0; row < bs; row++) { 743 vabs = PetscAbsScalar(*v); 744 v++; 745 rsum[gcol + col] += vabs; 746 /* non-diagonal block */ 747 if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs; 748 } 749 } 750 } 751 PetscCall(PetscLogFlops(nz * bs * bs)); 752 } 753 /* Bmat */ 754 v = bmat->a; 755 jj = bmat->j; 756 for (brow = 0; brow < mbs; brow++) { 757 grow = bs * (rstart + brow); 758 nz = bmat->i[brow + 1] - bmat->i[brow]; 759 for (bcol = 0; bcol < nz; bcol++) { 760 gcol = bs * garray[*jj]; 761 jj++; 762 for (col = 0; col < bs; col++) { 763 for (row = 0; row < bs; row++) { 764 vabs = PetscAbsScalar(*v); 765 v++; 766 rsum[gcol + col] += vabs; 767 rsum[grow + row] += vabs; 768 } 769 } 770 } 771 PetscCall(PetscLogFlops(nz * bs * bs)); 772 } 773 PetscCall(MPIU_Allreduce(rsum, rsum2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 774 *norm = 0.0; 775 for (col = 0; col < mat->cmap->N; col++) { 776 if (rsum2[col] > *norm) *norm = rsum2[col]; 777 } 778 PetscCall(PetscFree2(rsum, rsum2)); 779 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet"); 780 } 781 PetscFunctionReturn(PETSC_SUCCESS); 782 } 783 784 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode) 785 { 786 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 787 PetscInt nstash, reallocs; 788 789 PetscFunctionBegin; 790 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 791 792 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 793 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 794 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 795 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 796 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 797 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 798 PetscFunctionReturn(PETSC_SUCCESS); 799 } 800 801 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode) 802 { 803 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 804 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)baij->A->data; 805 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 806 PetscInt *row, *col; 807 PetscBool other_disassembled; 808 PetscMPIInt n; 809 PetscBool r1, r2, r3; 810 MatScalar *val; 811 812 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 813 PetscFunctionBegin; 814 if (!baij->donotstash && !mat->nooffprocentries) { 815 while (1) { 816 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 817 if (!flg) break; 818 819 for (i = 0; i < n;) { 820 /* Now identify the consecutive vals belonging to the same row */ 821 for (j = i, rstart = row[j]; j < n; j++) { 822 if (row[j] != rstart) break; 823 } 824 if (j < n) ncols = j - i; 825 else ncols = n - i; 826 /* Now assemble all these values with a single function call */ 827 PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 828 i = j; 829 } 830 } 831 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 832 /* Now process the block-stash. Since the values are stashed column-oriented, 833 set the roworiented flag to column oriented, and after MatSetValues() 834 restore the original flags */ 835 r1 = baij->roworiented; 836 r2 = a->roworiented; 837 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 838 839 baij->roworiented = PETSC_FALSE; 840 a->roworiented = PETSC_FALSE; 841 842 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 843 while (1) { 844 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 845 if (!flg) break; 846 847 for (i = 0; i < n;) { 848 /* Now identify the consecutive vals belonging to the same row */ 849 for (j = i, rstart = row[j]; j < n; j++) { 850 if (row[j] != rstart) break; 851 } 852 if (j < n) ncols = j - i; 853 else ncols = n - i; 854 PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 855 i = j; 856 } 857 } 858 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 859 860 baij->roworiented = r1; 861 a->roworiented = r2; 862 863 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */ 864 } 865 866 PetscCall(MatAssemblyBegin(baij->A, mode)); 867 PetscCall(MatAssemblyEnd(baij->A, mode)); 868 869 /* determine if any processor has disassembled, if so we must 870 also disassemble ourselves, in order that we may reassemble. */ 871 /* 872 if nonzero structure of submatrix B cannot change then we know that 873 no processor disassembled thus we can skip this stuff 874 */ 875 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 876 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 877 if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISBAIJ(mat)); 878 } 879 880 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ } 881 PetscCall(MatAssemblyBegin(baij->B, mode)); 882 PetscCall(MatAssemblyEnd(baij->B, mode)); 883 884 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 885 886 baij->rowvalues = NULL; 887 888 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 889 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 890 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 891 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 892 } 893 PetscFunctionReturn(PETSC_SUCCESS); 894 } 895 896 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode); 897 #include <petscdraw.h> 898 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 899 { 900 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 901 PetscInt bs = mat->rmap->bs; 902 PetscMPIInt rank = baij->rank; 903 PetscBool iascii, isdraw; 904 PetscViewer sviewer; 905 PetscViewerFormat format; 906 907 PetscFunctionBegin; 908 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 909 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 910 if (iascii) { 911 PetscCall(PetscViewerGetFormat(viewer, &format)); 912 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 913 MatInfo info; 914 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 915 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 916 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 917 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 918 mat->rmap->bs, (double)info.memory)); 919 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 920 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 921 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 922 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 923 PetscCall(PetscViewerFlush(viewer)); 924 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 925 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 926 PetscCall(VecScatterView(baij->Mvctx, viewer)); 927 PetscFunctionReturn(PETSC_SUCCESS); 928 } else if (format == PETSC_VIEWER_ASCII_INFO) { 929 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 930 PetscFunctionReturn(PETSC_SUCCESS); 931 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 932 PetscFunctionReturn(PETSC_SUCCESS); 933 } 934 } 935 936 if (isdraw) { 937 PetscDraw draw; 938 PetscBool isnull; 939 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 940 PetscCall(PetscDrawIsNull(draw, &isnull)); 941 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 942 } 943 944 { 945 /* assemble the entire matrix onto first processor. */ 946 Mat A; 947 Mat_SeqSBAIJ *Aloc; 948 Mat_SeqBAIJ *Bloc; 949 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 950 MatScalar *a; 951 const char *matname; 952 953 /* Should this be the same type as mat? */ 954 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 955 if (rank == 0) { 956 PetscCall(MatSetSizes(A, M, N, M, N)); 957 } else { 958 PetscCall(MatSetSizes(A, 0, 0, M, N)); 959 } 960 PetscCall(MatSetType(A, MATMPISBAIJ)); 961 PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 962 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 963 964 /* copy over the A part */ 965 Aloc = (Mat_SeqSBAIJ *)baij->A->data; 966 ai = Aloc->i; 967 aj = Aloc->j; 968 a = Aloc->a; 969 PetscCall(PetscMalloc1(bs, &rvals)); 970 971 for (i = 0; i < mbs; i++) { 972 rvals[0] = bs * (baij->rstartbs + i); 973 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 974 for (j = ai[i]; j < ai[i + 1]; j++) { 975 col = (baij->cstartbs + aj[j]) * bs; 976 for (k = 0; k < bs; k++) { 977 PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 978 col++; 979 a += bs; 980 } 981 } 982 } 983 /* copy over the B part */ 984 Bloc = (Mat_SeqBAIJ *)baij->B->data; 985 ai = Bloc->i; 986 aj = Bloc->j; 987 a = Bloc->a; 988 for (i = 0; i < mbs; i++) { 989 rvals[0] = bs * (baij->rstartbs + i); 990 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 991 for (j = ai[i]; j < ai[i + 1]; j++) { 992 col = baij->garray[aj[j]] * bs; 993 for (k = 0; k < bs; k++) { 994 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 995 col++; 996 a += bs; 997 } 998 } 999 } 1000 PetscCall(PetscFree(rvals)); 1001 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1002 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1003 /* 1004 Everyone has to call to draw the matrix since the graphics waits are 1005 synchronized across all processors that share the PetscDraw object 1006 */ 1007 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1008 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 1009 if (rank == 0) { 1010 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)(A->data))->A, matname)); 1011 PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)(A->data))->A, sviewer)); 1012 } 1013 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1014 PetscCall(PetscViewerFlush(viewer)); 1015 PetscCall(MatDestroy(&A)); 1016 } 1017 PetscFunctionReturn(PETSC_SUCCESS); 1018 } 1019 1020 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1021 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary 1022 1023 PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer) 1024 { 1025 PetscBool iascii, isdraw, issocket, isbinary; 1026 1027 PetscFunctionBegin; 1028 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1029 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1030 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1031 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1032 if (iascii || isdraw || issocket) { 1033 PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer)); 1034 } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer)); 1035 PetscFunctionReturn(PETSC_SUCCESS); 1036 } 1037 1038 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy) 1039 { 1040 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1041 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1042 PetscScalar *from; 1043 const PetscScalar *x; 1044 1045 PetscFunctionBegin; 1046 /* diagonal part */ 1047 PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 1048 /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */ 1049 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1050 PetscCall(VecZeroEntries(a->slvec1b)); 1051 1052 /* subdiagonal part */ 1053 PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 1054 PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1055 1056 /* copy x into the vec slvec0 */ 1057 PetscCall(VecGetArray(a->slvec0, &from)); 1058 PetscCall(VecGetArrayRead(xx, &x)); 1059 1060 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1061 PetscCall(VecRestoreArray(a->slvec0, &from)); 1062 PetscCall(VecRestoreArrayRead(xx, &x)); 1063 1064 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1065 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1066 /* supperdiagonal part */ 1067 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 1068 PetscFunctionReturn(PETSC_SUCCESS); 1069 } 1070 1071 PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy) 1072 { 1073 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1074 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1075 PetscScalar *from; 1076 const PetscScalar *x; 1077 1078 PetscFunctionBegin; 1079 /* diagonal part */ 1080 PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 1081 /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */ 1082 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1083 PetscCall(VecZeroEntries(a->slvec1b)); 1084 1085 /* subdiagonal part */ 1086 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1087 1088 /* copy x into the vec slvec0 */ 1089 PetscCall(VecGetArray(a->slvec0, &from)); 1090 PetscCall(VecGetArrayRead(xx, &x)); 1091 1092 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1093 PetscCall(VecRestoreArray(a->slvec0, &from)); 1094 PetscCall(VecRestoreArrayRead(xx, &x)); 1095 1096 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1097 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1098 /* supperdiagonal part */ 1099 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 1100 PetscFunctionReturn(PETSC_SUCCESS); 1101 } 1102 1103 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz) 1104 { 1105 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1106 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1107 PetscScalar *from; 1108 const PetscScalar *x; 1109 1110 PetscFunctionBegin; 1111 /* diagonal part */ 1112 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 1113 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1114 PetscCall(VecZeroEntries(a->slvec1b)); 1115 1116 /* subdiagonal part */ 1117 PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 1118 PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1119 1120 /* copy x into the vec slvec0 */ 1121 PetscCall(VecGetArray(a->slvec0, &from)); 1122 PetscCall(VecGetArrayRead(xx, &x)); 1123 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1124 PetscCall(VecRestoreArray(a->slvec0, &from)); 1125 1126 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1127 PetscCall(VecRestoreArrayRead(xx, &x)); 1128 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1129 1130 /* supperdiagonal part */ 1131 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 1132 PetscFunctionReturn(PETSC_SUCCESS); 1133 } 1134 1135 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1136 { 1137 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1138 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1139 PetscScalar *from; 1140 const PetscScalar *x; 1141 1142 PetscFunctionBegin; 1143 /* diagonal part */ 1144 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 1145 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1146 PetscCall(VecZeroEntries(a->slvec1b)); 1147 1148 /* subdiagonal part */ 1149 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1150 1151 /* copy x into the vec slvec0 */ 1152 PetscCall(VecGetArray(a->slvec0, &from)); 1153 PetscCall(VecGetArrayRead(xx, &x)); 1154 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1155 PetscCall(VecRestoreArray(a->slvec0, &from)); 1156 1157 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1158 PetscCall(VecRestoreArrayRead(xx, &x)); 1159 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1160 1161 /* supperdiagonal part */ 1162 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 1163 PetscFunctionReturn(PETSC_SUCCESS); 1164 } 1165 1166 /* 1167 This only works correctly for square matrices where the subblock A->A is the 1168 diagonal block 1169 */ 1170 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v) 1171 { 1172 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1173 1174 PetscFunctionBegin; 1175 /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1176 PetscCall(MatGetDiagonal(a->A, v)); 1177 PetscFunctionReturn(PETSC_SUCCESS); 1178 } 1179 1180 PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa) 1181 { 1182 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1183 1184 PetscFunctionBegin; 1185 PetscCall(MatScale(a->A, aa)); 1186 PetscCall(MatScale(a->B, aa)); 1187 PetscFunctionReturn(PETSC_SUCCESS); 1188 } 1189 1190 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1191 { 1192 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 1193 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1194 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1195 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1196 PetscInt *cmap, *idx_p, cstart = mat->rstartbs; 1197 1198 PetscFunctionBegin; 1199 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1200 mat->getrowactive = PETSC_TRUE; 1201 1202 if (!mat->rowvalues && (idx || v)) { 1203 /* 1204 allocate enough space to hold information from the longest row. 1205 */ 1206 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ *)mat->A->data; 1207 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ *)mat->B->data; 1208 PetscInt max = 1, mbs = mat->mbs, tmp; 1209 for (i = 0; i < mbs; i++) { 1210 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */ 1211 if (max < tmp) max = tmp; 1212 } 1213 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1214 } 1215 1216 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1217 lrow = row - brstart; /* local row index */ 1218 1219 pvA = &vworkA; 1220 pcA = &cworkA; 1221 pvB = &vworkB; 1222 pcB = &cworkB; 1223 if (!v) { 1224 pvA = NULL; 1225 pvB = NULL; 1226 } 1227 if (!idx) { 1228 pcA = NULL; 1229 if (!v) pcB = NULL; 1230 } 1231 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1232 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1233 nztot = nzA + nzB; 1234 1235 cmap = mat->garray; 1236 if (v || idx) { 1237 if (nztot) { 1238 /* Sort by increasing column numbers, assuming A and B already sorted */ 1239 PetscInt imark = -1; 1240 if (v) { 1241 *v = v_p = mat->rowvalues; 1242 for (i = 0; i < nzB; i++) { 1243 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1244 else break; 1245 } 1246 imark = i; 1247 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1248 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1249 } 1250 if (idx) { 1251 *idx = idx_p = mat->rowindices; 1252 if (imark > -1) { 1253 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1254 } else { 1255 for (i = 0; i < nzB; i++) { 1256 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1257 else break; 1258 } 1259 imark = i; 1260 } 1261 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1262 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1263 } 1264 } else { 1265 if (idx) *idx = NULL; 1266 if (v) *v = NULL; 1267 } 1268 } 1269 *nz = nztot; 1270 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1271 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1272 PetscFunctionReturn(PETSC_SUCCESS); 1273 } 1274 1275 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1276 { 1277 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1278 1279 PetscFunctionBegin; 1280 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1281 baij->getrowactive = PETSC_FALSE; 1282 PetscFunctionReturn(PETSC_SUCCESS); 1283 } 1284 1285 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1286 { 1287 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1288 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1289 1290 PetscFunctionBegin; 1291 aA->getrow_utriangular = PETSC_TRUE; 1292 PetscFunctionReturn(PETSC_SUCCESS); 1293 } 1294 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1295 { 1296 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1297 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1298 1299 PetscFunctionBegin; 1300 aA->getrow_utriangular = PETSC_FALSE; 1301 PetscFunctionReturn(PETSC_SUCCESS); 1302 } 1303 1304 PetscErrorCode MatConjugate_MPISBAIJ(Mat mat) 1305 { 1306 PetscFunctionBegin; 1307 if (PetscDefined(USE_COMPLEX)) { 1308 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data; 1309 1310 PetscCall(MatConjugate(a->A)); 1311 PetscCall(MatConjugate(a->B)); 1312 } 1313 PetscFunctionReturn(PETSC_SUCCESS); 1314 } 1315 1316 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1317 { 1318 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1319 1320 PetscFunctionBegin; 1321 PetscCall(MatRealPart(a->A)); 1322 PetscCall(MatRealPart(a->B)); 1323 PetscFunctionReturn(PETSC_SUCCESS); 1324 } 1325 1326 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1327 { 1328 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1329 1330 PetscFunctionBegin; 1331 PetscCall(MatImaginaryPart(a->A)); 1332 PetscCall(MatImaginaryPart(a->B)); 1333 PetscFunctionReturn(PETSC_SUCCESS); 1334 } 1335 1336 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 1337 Input: isrow - distributed(parallel), 1338 iscol_local - locally owned (seq) 1339 */ 1340 PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg) 1341 { 1342 PetscInt sz1, sz2, *a1, *a2, i, j, k, nmatch; 1343 const PetscInt *ptr1, *ptr2; 1344 1345 PetscFunctionBegin; 1346 PetscCall(ISGetLocalSize(isrow, &sz1)); 1347 PetscCall(ISGetLocalSize(iscol_local, &sz2)); 1348 if (sz1 > sz2) { 1349 *flg = PETSC_FALSE; 1350 PetscFunctionReturn(PETSC_SUCCESS); 1351 } 1352 1353 PetscCall(ISGetIndices(isrow, &ptr1)); 1354 PetscCall(ISGetIndices(iscol_local, &ptr2)); 1355 1356 PetscCall(PetscMalloc1(sz1, &a1)); 1357 PetscCall(PetscMalloc1(sz2, &a2)); 1358 PetscCall(PetscArraycpy(a1, ptr1, sz1)); 1359 PetscCall(PetscArraycpy(a2, ptr2, sz2)); 1360 PetscCall(PetscSortInt(sz1, a1)); 1361 PetscCall(PetscSortInt(sz2, a2)); 1362 1363 nmatch = 0; 1364 k = 0; 1365 for (i = 0; i < sz1; i++) { 1366 for (j = k; j < sz2; j++) { 1367 if (a1[i] == a2[j]) { 1368 k = j; 1369 nmatch++; 1370 break; 1371 } 1372 } 1373 } 1374 PetscCall(ISRestoreIndices(isrow, &ptr1)); 1375 PetscCall(ISRestoreIndices(iscol_local, &ptr2)); 1376 PetscCall(PetscFree(a1)); 1377 PetscCall(PetscFree(a2)); 1378 if (nmatch < sz1) { 1379 *flg = PETSC_FALSE; 1380 } else { 1381 *flg = PETSC_TRUE; 1382 } 1383 PetscFunctionReturn(PETSC_SUCCESS); 1384 } 1385 1386 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1387 { 1388 IS iscol_local; 1389 PetscInt csize; 1390 PetscBool isequal; 1391 1392 PetscFunctionBegin; 1393 PetscCall(ISGetLocalSize(iscol, &csize)); 1394 if (call == MAT_REUSE_MATRIX) { 1395 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1396 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1397 } else { 1398 PetscBool issorted; 1399 1400 PetscCall(ISAllGather(iscol, &iscol_local)); 1401 PetscCall(ISEqual_private(isrow, iscol_local, &isequal)); 1402 PetscCall(ISSorted(iscol_local, &issorted)); 1403 PetscCheck(isequal && issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must equal isrow and be sorted"); 1404 } 1405 1406 /* now call MatCreateSubMatrix_MPIBAIJ() */ 1407 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat)); 1408 if (call == MAT_INITIAL_MATRIX) { 1409 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1410 PetscCall(ISDestroy(&iscol_local)); 1411 } 1412 PetscFunctionReturn(PETSC_SUCCESS); 1413 } 1414 1415 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1416 { 1417 Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data; 1418 1419 PetscFunctionBegin; 1420 PetscCall(MatZeroEntries(l->A)); 1421 PetscCall(MatZeroEntries(l->B)); 1422 PetscFunctionReturn(PETSC_SUCCESS); 1423 } 1424 1425 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1426 { 1427 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)matin->data; 1428 Mat A = a->A, B = a->B; 1429 PetscLogDouble isend[5], irecv[5]; 1430 1431 PetscFunctionBegin; 1432 info->block_size = (PetscReal)matin->rmap->bs; 1433 1434 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1435 1436 isend[0] = info->nz_used; 1437 isend[1] = info->nz_allocated; 1438 isend[2] = info->nz_unneeded; 1439 isend[3] = info->memory; 1440 isend[4] = info->mallocs; 1441 1442 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1443 1444 isend[0] += info->nz_used; 1445 isend[1] += info->nz_allocated; 1446 isend[2] += info->nz_unneeded; 1447 isend[3] += info->memory; 1448 isend[4] += info->mallocs; 1449 if (flag == MAT_LOCAL) { 1450 info->nz_used = isend[0]; 1451 info->nz_allocated = isend[1]; 1452 info->nz_unneeded = isend[2]; 1453 info->memory = isend[3]; 1454 info->mallocs = isend[4]; 1455 } else if (flag == MAT_GLOBAL_MAX) { 1456 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1457 1458 info->nz_used = irecv[0]; 1459 info->nz_allocated = irecv[1]; 1460 info->nz_unneeded = irecv[2]; 1461 info->memory = irecv[3]; 1462 info->mallocs = irecv[4]; 1463 } else if (flag == MAT_GLOBAL_SUM) { 1464 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1465 1466 info->nz_used = irecv[0]; 1467 info->nz_allocated = irecv[1]; 1468 info->nz_unneeded = irecv[2]; 1469 info->memory = irecv[3]; 1470 info->mallocs = irecv[4]; 1471 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1472 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1473 info->fill_ratio_needed = 0; 1474 info->factor_mallocs = 0; 1475 PetscFunctionReturn(PETSC_SUCCESS); 1476 } 1477 1478 PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg) 1479 { 1480 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1481 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1482 1483 PetscFunctionBegin; 1484 switch (op) { 1485 case MAT_NEW_NONZERO_LOCATIONS: 1486 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1487 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1488 case MAT_KEEP_NONZERO_PATTERN: 1489 case MAT_SUBMAT_SINGLEIS: 1490 case MAT_NEW_NONZERO_LOCATION_ERR: 1491 MatCheckPreallocated(A, 1); 1492 PetscCall(MatSetOption(a->A, op, flg)); 1493 PetscCall(MatSetOption(a->B, op, flg)); 1494 break; 1495 case MAT_ROW_ORIENTED: 1496 MatCheckPreallocated(A, 1); 1497 a->roworiented = flg; 1498 1499 PetscCall(MatSetOption(a->A, op, flg)); 1500 PetscCall(MatSetOption(a->B, op, flg)); 1501 break; 1502 case MAT_FORCE_DIAGONAL_ENTRIES: 1503 case MAT_SORTED_FULL: 1504 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1505 break; 1506 case MAT_IGNORE_OFF_PROC_ENTRIES: 1507 a->donotstash = flg; 1508 break; 1509 case MAT_USE_HASH_TABLE: 1510 a->ht_flag = flg; 1511 break; 1512 case MAT_HERMITIAN: 1513 MatCheckPreallocated(A, 1); 1514 PetscCall(MatSetOption(a->A, op, flg)); 1515 #if defined(PETSC_USE_COMPLEX) 1516 if (flg) { /* need different mat-vec ops */ 1517 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1518 A->ops->multadd = MatMultAdd_MPISBAIJ_Hermitian; 1519 A->ops->multtranspose = NULL; 1520 A->ops->multtransposeadd = NULL; 1521 A->symmetric = PETSC_BOOL3_FALSE; 1522 } 1523 #endif 1524 break; 1525 case MAT_SPD: 1526 case MAT_SYMMETRIC: 1527 MatCheckPreallocated(A, 1); 1528 PetscCall(MatSetOption(a->A, op, flg)); 1529 #if defined(PETSC_USE_COMPLEX) 1530 if (flg) { /* restore to use default mat-vec ops */ 1531 A->ops->mult = MatMult_MPISBAIJ; 1532 A->ops->multadd = MatMultAdd_MPISBAIJ; 1533 A->ops->multtranspose = MatMult_MPISBAIJ; 1534 A->ops->multtransposeadd = MatMultAdd_MPISBAIJ; 1535 } 1536 #endif 1537 break; 1538 case MAT_STRUCTURALLY_SYMMETRIC: 1539 MatCheckPreallocated(A, 1); 1540 PetscCall(MatSetOption(a->A, op, flg)); 1541 break; 1542 case MAT_SYMMETRY_ETERNAL: 1543 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1544 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix must be symmetric"); 1545 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1546 break; 1547 case MAT_SPD_ETERNAL: 1548 break; 1549 case MAT_IGNORE_LOWER_TRIANGULAR: 1550 aA->ignore_ltriangular = flg; 1551 break; 1552 case MAT_ERROR_LOWER_TRIANGULAR: 1553 aA->ignore_ltriangular = flg; 1554 break; 1555 case MAT_GETROW_UPPERTRIANGULAR: 1556 aA->getrow_utriangular = flg; 1557 break; 1558 default: 1559 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1560 } 1561 PetscFunctionReturn(PETSC_SUCCESS); 1562 } 1563 1564 PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B) 1565 { 1566 PetscFunctionBegin; 1567 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 1568 if (reuse == MAT_INITIAL_MATRIX) { 1569 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B)); 1570 } else if (reuse == MAT_REUSE_MATRIX) { 1571 PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN)); 1572 } 1573 PetscFunctionReturn(PETSC_SUCCESS); 1574 } 1575 1576 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr) 1577 { 1578 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1579 Mat a = baij->A, b = baij->B; 1580 PetscInt nv, m, n; 1581 PetscBool flg; 1582 1583 PetscFunctionBegin; 1584 if (ll != rr) { 1585 PetscCall(VecEqual(ll, rr, &flg)); 1586 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same"); 1587 } 1588 if (!ll) PetscFunctionReturn(PETSC_SUCCESS); 1589 1590 PetscCall(MatGetLocalSize(mat, &m, &n)); 1591 PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n); 1592 1593 PetscCall(VecGetLocalSize(rr, &nv)); 1594 PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size"); 1595 1596 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1597 1598 /* left diagonalscale the off-diagonal part */ 1599 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1600 1601 /* scale the diagonal part */ 1602 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1603 1604 /* right diagonalscale the off-diagonal part */ 1605 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1606 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1607 PetscFunctionReturn(PETSC_SUCCESS); 1608 } 1609 1610 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1611 { 1612 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1613 1614 PetscFunctionBegin; 1615 PetscCall(MatSetUnfactored(a->A)); 1616 PetscFunctionReturn(PETSC_SUCCESS); 1617 } 1618 1619 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *); 1620 1621 PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag) 1622 { 1623 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data; 1624 Mat a, b, c, d; 1625 PetscBool flg; 1626 1627 PetscFunctionBegin; 1628 a = matA->A; 1629 b = matA->B; 1630 c = matB->A; 1631 d = matB->B; 1632 1633 PetscCall(MatEqual(a, c, &flg)); 1634 if (flg) PetscCall(MatEqual(b, d, &flg)); 1635 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1636 PetscFunctionReturn(PETSC_SUCCESS); 1637 } 1638 1639 PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str) 1640 { 1641 PetscBool isbaij; 1642 1643 PetscFunctionBegin; 1644 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, "")); 1645 PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name); 1646 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1647 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1648 PetscCall(MatGetRowUpperTriangular(A)); 1649 PetscCall(MatCopy_Basic(A, B, str)); 1650 PetscCall(MatRestoreRowUpperTriangular(A)); 1651 } else { 1652 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1653 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1654 1655 PetscCall(MatCopy(a->A, b->A, str)); 1656 PetscCall(MatCopy(a->B, b->B, str)); 1657 } 1658 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1659 PetscFunctionReturn(PETSC_SUCCESS); 1660 } 1661 1662 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1663 { 1664 Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data; 1665 PetscBLASInt bnz, one = 1; 1666 Mat_SeqSBAIJ *xa, *ya; 1667 Mat_SeqBAIJ *xb, *yb; 1668 1669 PetscFunctionBegin; 1670 if (str == SAME_NONZERO_PATTERN) { 1671 PetscScalar alpha = a; 1672 xa = (Mat_SeqSBAIJ *)xx->A->data; 1673 ya = (Mat_SeqSBAIJ *)yy->A->data; 1674 PetscCall(PetscBLASIntCast(xa->nz, &bnz)); 1675 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one)); 1676 xb = (Mat_SeqBAIJ *)xx->B->data; 1677 yb = (Mat_SeqBAIJ *)yy->B->data; 1678 PetscCall(PetscBLASIntCast(xb->nz, &bnz)); 1679 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one)); 1680 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1681 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1682 PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 1683 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1684 PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 1685 } else { 1686 Mat B; 1687 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1688 PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 1689 PetscCall(MatGetRowUpperTriangular(X)); 1690 PetscCall(MatGetRowUpperTriangular(Y)); 1691 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1692 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1693 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1694 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1695 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1696 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1697 PetscCall(MatSetType(B, MATMPISBAIJ)); 1698 PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d)); 1699 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1700 PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1701 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1702 PetscCall(MatHeaderMerge(Y, &B)); 1703 PetscCall(PetscFree(nnz_d)); 1704 PetscCall(PetscFree(nnz_o)); 1705 PetscCall(MatRestoreRowUpperTriangular(X)); 1706 PetscCall(MatRestoreRowUpperTriangular(Y)); 1707 } 1708 PetscFunctionReturn(PETSC_SUCCESS); 1709 } 1710 1711 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[]) 1712 { 1713 PetscInt i; 1714 PetscBool flg; 1715 1716 PetscFunctionBegin; 1717 PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */ 1718 for (i = 0; i < n; i++) { 1719 PetscCall(ISEqual(irow[i], icol[i], &flg)); 1720 if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i])); 1721 } 1722 PetscFunctionReturn(PETSC_SUCCESS); 1723 } 1724 1725 PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a) 1726 { 1727 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data; 1728 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)maij->A->data; 1729 1730 PetscFunctionBegin; 1731 if (!Y->preallocated) { 1732 PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 1733 } else if (!aij->nz) { 1734 PetscInt nonew = aij->nonew; 1735 PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 1736 aij->nonew = nonew; 1737 } 1738 PetscCall(MatShift_Basic(Y, a)); 1739 PetscFunctionReturn(PETSC_SUCCESS); 1740 } 1741 1742 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d) 1743 { 1744 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1745 1746 PetscFunctionBegin; 1747 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 1748 PetscCall(MatMissingDiagonal(a->A, missing, d)); 1749 if (d) { 1750 PetscInt rstart; 1751 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1752 *d += rstart / A->rmap->bs; 1753 } 1754 PetscFunctionReturn(PETSC_SUCCESS); 1755 } 1756 1757 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a) 1758 { 1759 PetscFunctionBegin; 1760 *a = ((Mat_MPISBAIJ *)A->data)->A; 1761 PetscFunctionReturn(PETSC_SUCCESS); 1762 } 1763 1764 static PetscErrorCode MatEliminateZeros_MPISBAIJ(Mat A, PetscBool keep) 1765 { 1766 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1767 1768 PetscFunctionBegin; 1769 PetscCall(MatEliminateZeros_SeqSBAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 1770 PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 1771 PetscFunctionReturn(PETSC_SUCCESS); 1772 } 1773 1774 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1775 MatGetRow_MPISBAIJ, 1776 MatRestoreRow_MPISBAIJ, 1777 MatMult_MPISBAIJ, 1778 /* 4*/ MatMultAdd_MPISBAIJ, 1779 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1780 MatMultAdd_MPISBAIJ, 1781 NULL, 1782 NULL, 1783 NULL, 1784 /* 10*/ NULL, 1785 NULL, 1786 NULL, 1787 MatSOR_MPISBAIJ, 1788 MatTranspose_MPISBAIJ, 1789 /* 15*/ MatGetInfo_MPISBAIJ, 1790 MatEqual_MPISBAIJ, 1791 MatGetDiagonal_MPISBAIJ, 1792 MatDiagonalScale_MPISBAIJ, 1793 MatNorm_MPISBAIJ, 1794 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1795 MatAssemblyEnd_MPISBAIJ, 1796 MatSetOption_MPISBAIJ, 1797 MatZeroEntries_MPISBAIJ, 1798 /* 24*/ NULL, 1799 NULL, 1800 NULL, 1801 NULL, 1802 NULL, 1803 /* 29*/ MatSetUp_MPI_Hash, 1804 NULL, 1805 NULL, 1806 MatGetDiagonalBlock_MPISBAIJ, 1807 NULL, 1808 /* 34*/ MatDuplicate_MPISBAIJ, 1809 NULL, 1810 NULL, 1811 NULL, 1812 NULL, 1813 /* 39*/ MatAXPY_MPISBAIJ, 1814 MatCreateSubMatrices_MPISBAIJ, 1815 MatIncreaseOverlap_MPISBAIJ, 1816 MatGetValues_MPISBAIJ, 1817 MatCopy_MPISBAIJ, 1818 /* 44*/ NULL, 1819 MatScale_MPISBAIJ, 1820 MatShift_MPISBAIJ, 1821 NULL, 1822 NULL, 1823 /* 49*/ NULL, 1824 NULL, 1825 NULL, 1826 NULL, 1827 NULL, 1828 /* 54*/ NULL, 1829 NULL, 1830 MatSetUnfactored_MPISBAIJ, 1831 NULL, 1832 MatSetValuesBlocked_MPISBAIJ, 1833 /* 59*/ MatCreateSubMatrix_MPISBAIJ, 1834 NULL, 1835 NULL, 1836 NULL, 1837 NULL, 1838 /* 64*/ NULL, 1839 NULL, 1840 NULL, 1841 NULL, 1842 NULL, 1843 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1844 NULL, 1845 MatConvert_MPISBAIJ_Basic, 1846 NULL, 1847 NULL, 1848 /* 74*/ NULL, 1849 NULL, 1850 NULL, 1851 NULL, 1852 NULL, 1853 /* 79*/ NULL, 1854 NULL, 1855 NULL, 1856 NULL, 1857 MatLoad_MPISBAIJ, 1858 /* 84*/ NULL, 1859 NULL, 1860 NULL, 1861 NULL, 1862 NULL, 1863 /* 89*/ NULL, 1864 NULL, 1865 NULL, 1866 NULL, 1867 NULL, 1868 /* 94*/ NULL, 1869 NULL, 1870 NULL, 1871 NULL, 1872 NULL, 1873 /* 99*/ NULL, 1874 NULL, 1875 NULL, 1876 MatConjugate_MPISBAIJ, 1877 NULL, 1878 /*104*/ NULL, 1879 MatRealPart_MPISBAIJ, 1880 MatImaginaryPart_MPISBAIJ, 1881 MatGetRowUpperTriangular_MPISBAIJ, 1882 MatRestoreRowUpperTriangular_MPISBAIJ, 1883 /*109*/ NULL, 1884 NULL, 1885 NULL, 1886 NULL, 1887 MatMissingDiagonal_MPISBAIJ, 1888 /*114*/ NULL, 1889 NULL, 1890 NULL, 1891 NULL, 1892 NULL, 1893 /*119*/ NULL, 1894 NULL, 1895 NULL, 1896 NULL, 1897 NULL, 1898 /*124*/ NULL, 1899 NULL, 1900 NULL, 1901 NULL, 1902 NULL, 1903 /*129*/ NULL, 1904 NULL, 1905 NULL, 1906 NULL, 1907 NULL, 1908 /*134*/ NULL, 1909 NULL, 1910 NULL, 1911 NULL, 1912 NULL, 1913 /*139*/ MatSetBlockSizes_Default, 1914 NULL, 1915 NULL, 1916 NULL, 1917 NULL, 1918 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ, 1919 NULL, 1920 NULL, 1921 NULL, 1922 NULL, 1923 NULL, 1924 /*150*/ NULL, 1925 MatEliminateZeros_MPISBAIJ}; 1926 1927 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 1928 { 1929 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1930 PetscInt i, mbs, Mbs; 1931 PetscMPIInt size; 1932 1933 PetscFunctionBegin; 1934 if (B->hash_active) { 1935 B->ops[0] = b->cops; 1936 B->hash_active = PETSC_FALSE; 1937 } 1938 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 1939 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 1940 PetscCall(PetscLayoutSetUp(B->rmap)); 1941 PetscCall(PetscLayoutSetUp(B->cmap)); 1942 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 1943 PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N); 1944 PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n); 1945 1946 mbs = B->rmap->n / bs; 1947 Mbs = B->rmap->N / bs; 1948 PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs); 1949 1950 B->rmap->bs = bs; 1951 b->bs2 = bs * bs; 1952 b->mbs = mbs; 1953 b->Mbs = Mbs; 1954 b->nbs = B->cmap->n / bs; 1955 b->Nbs = B->cmap->N / bs; 1956 1957 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 1958 b->rstartbs = B->rmap->rstart / bs; 1959 b->rendbs = B->rmap->rend / bs; 1960 1961 b->cstartbs = B->cmap->rstart / bs; 1962 b->cendbs = B->cmap->rend / bs; 1963 1964 #if defined(PETSC_USE_CTABLE) 1965 PetscCall(PetscHMapIDestroy(&b->colmap)); 1966 #else 1967 PetscCall(PetscFree(b->colmap)); 1968 #endif 1969 PetscCall(PetscFree(b->garray)); 1970 PetscCall(VecDestroy(&b->lvec)); 1971 PetscCall(VecScatterDestroy(&b->Mvctx)); 1972 PetscCall(VecDestroy(&b->slvec0)); 1973 PetscCall(VecDestroy(&b->slvec0b)); 1974 PetscCall(VecDestroy(&b->slvec1)); 1975 PetscCall(VecDestroy(&b->slvec1a)); 1976 PetscCall(VecDestroy(&b->slvec1b)); 1977 PetscCall(VecScatterDestroy(&b->sMvctx)); 1978 1979 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 1980 PetscCall(MatDestroy(&b->B)); 1981 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 1982 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 1983 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 1984 1985 PetscCall(MatDestroy(&b->A)); 1986 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 1987 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 1988 PetscCall(MatSetType(b->A, MATSEQSBAIJ)); 1989 1990 PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 1991 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 1992 1993 B->preallocated = PETSC_TRUE; 1994 B->was_assembled = PETSC_FALSE; 1995 B->assembled = PETSC_FALSE; 1996 PetscFunctionReturn(PETSC_SUCCESS); 1997 } 1998 1999 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 2000 { 2001 PetscInt m, rstart, cend; 2002 PetscInt i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2003 const PetscInt *JJ = NULL; 2004 PetscScalar *values = NULL; 2005 PetscBool roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented; 2006 PetscBool nooffprocentries; 2007 2008 PetscFunctionBegin; 2009 PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 2010 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2011 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2012 PetscCall(PetscLayoutSetUp(B->rmap)); 2013 PetscCall(PetscLayoutSetUp(B->cmap)); 2014 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2015 m = B->rmap->n / bs; 2016 rstart = B->rmap->rstart / bs; 2017 cend = B->cmap->rend / bs; 2018 2019 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2020 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2021 for (i = 0; i < m; i++) { 2022 nz = ii[i + 1] - ii[i]; 2023 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2024 /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */ 2025 JJ = jj + ii[i]; 2026 bd = 0; 2027 for (j = 0; j < nz; j++) { 2028 if (*JJ >= i + rstart) break; 2029 JJ++; 2030 bd++; 2031 } 2032 d = 0; 2033 for (; j < nz; j++) { 2034 if (*JJ++ >= cend) break; 2035 d++; 2036 } 2037 d_nnz[i] = d; 2038 o_nnz[i] = nz - d - bd; 2039 nz = nz - bd; 2040 nz_max = PetscMax(nz_max, nz); 2041 } 2042 PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2043 PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE)); 2044 PetscCall(PetscFree2(d_nnz, o_nnz)); 2045 2046 values = (PetscScalar *)V; 2047 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2048 for (i = 0; i < m; i++) { 2049 PetscInt row = i + rstart; 2050 PetscInt ncols = ii[i + 1] - ii[i]; 2051 const PetscInt *icols = jj + ii[i]; 2052 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2053 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2054 PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2055 } else { /* block ordering does not match so we can only insert one block at a time. */ 2056 PetscInt j; 2057 for (j = 0; j < ncols; j++) { 2058 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2059 PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2060 } 2061 } 2062 } 2063 2064 if (!V) PetscCall(PetscFree(values)); 2065 nooffprocentries = B->nooffprocentries; 2066 B->nooffprocentries = PETSC_TRUE; 2067 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2068 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2069 B->nooffprocentries = nooffprocentries; 2070 2071 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2072 PetscFunctionReturn(PETSC_SUCCESS); 2073 } 2074 2075 /*MC 2076 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2077 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2078 the matrix is stored. 2079 2080 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2081 can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`); 2082 2083 Options Database Key: 2084 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()` 2085 2086 Level: beginner 2087 2088 Note: 2089 The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the 2090 diagonal portion of the matrix of each process has to less than or equal the number of columns. 2091 2092 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType` 2093 M*/ 2094 2095 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2096 { 2097 Mat_MPISBAIJ *b; 2098 PetscBool flg = PETSC_FALSE; 2099 2100 PetscFunctionBegin; 2101 PetscCall(PetscNew(&b)); 2102 B->data = (void *)b; 2103 B->ops[0] = MatOps_Values; 2104 2105 B->ops->destroy = MatDestroy_MPISBAIJ; 2106 B->ops->view = MatView_MPISBAIJ; 2107 B->assembled = PETSC_FALSE; 2108 B->insertmode = NOT_SET_VALUES; 2109 2110 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2111 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2112 2113 /* build local table of row and column ownerships */ 2114 PetscCall(PetscMalloc1(b->size + 2, &b->rangebs)); 2115 2116 /* build cache for off array entries formed */ 2117 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2118 2119 b->donotstash = PETSC_FALSE; 2120 b->colmap = NULL; 2121 b->garray = NULL; 2122 b->roworiented = PETSC_TRUE; 2123 2124 /* stuff used in block assembly */ 2125 b->barray = NULL; 2126 2127 /* stuff used for matrix vector multiply */ 2128 b->lvec = NULL; 2129 b->Mvctx = NULL; 2130 b->slvec0 = NULL; 2131 b->slvec0b = NULL; 2132 b->slvec1 = NULL; 2133 b->slvec1a = NULL; 2134 b->slvec1b = NULL; 2135 b->sMvctx = NULL; 2136 2137 /* stuff for MatGetRow() */ 2138 b->rowindices = NULL; 2139 b->rowvalues = NULL; 2140 b->getrowactive = PETSC_FALSE; 2141 2142 /* hash table stuff */ 2143 b->ht = NULL; 2144 b->hd = NULL; 2145 b->ht_size = 0; 2146 b->ht_flag = PETSC_FALSE; 2147 b->ht_fact = 0; 2148 b->ht_total_ct = 0; 2149 b->ht_insert_ct = 0; 2150 2151 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2152 b->ijonly = PETSC_FALSE; 2153 2154 b->in_loc = NULL; 2155 b->v_loc = NULL; 2156 b->n_loc = 0; 2157 2158 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ)); 2159 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ)); 2160 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ)); 2161 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ)); 2162 #if defined(PETSC_HAVE_ELEMENTAL) 2163 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental)); 2164 #endif 2165 #if defined(PETSC_HAVE_SCALAPACK) 2166 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK)); 2167 #endif 2168 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic)); 2169 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic)); 2170 2171 B->symmetric = PETSC_BOOL3_TRUE; 2172 B->structurally_symmetric = PETSC_BOOL3_TRUE; 2173 B->symmetry_eternal = PETSC_TRUE; 2174 B->structural_symmetry_eternal = PETSC_TRUE; 2175 #if defined(PETSC_USE_COMPLEX) 2176 B->hermitian = PETSC_BOOL3_FALSE; 2177 #else 2178 B->hermitian = PETSC_BOOL3_TRUE; 2179 #endif 2180 2181 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ)); 2182 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat"); 2183 PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL)); 2184 if (flg) { 2185 PetscReal fact = 1.39; 2186 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2187 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2188 if (fact <= 1.0) fact = 1.39; 2189 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2190 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2191 } 2192 PetscOptionsEnd(); 2193 PetscFunctionReturn(PETSC_SUCCESS); 2194 } 2195 2196 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2197 /*MC 2198 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2199 2200 This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator, 2201 and `MATMPISBAIJ` otherwise. 2202 2203 Options Database Key: 2204 . -mat_type sbaij - sets the matrix type to `MATSBAIJ` during a call to `MatSetFromOptions()` 2205 2206 Level: beginner 2207 2208 .seealso: [](ch_matrices), `Mat`, `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ()`, `MATSEQSBAIJ`, `MATMPISBAIJ` 2209 M*/ 2210 2211 /*@C 2212 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2213 the user should preallocate the matrix storage by setting the parameters 2214 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2215 performance can be increased by more than a factor of 50. 2216 2217 Collective 2218 2219 Input Parameters: 2220 + B - the matrix 2221 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2222 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2223 . d_nz - number of block nonzeros per block row in diagonal portion of local 2224 submatrix (same for all local rows) 2225 . d_nnz - array containing the number of block nonzeros in the various block rows 2226 in the upper triangular and diagonal part of the in diagonal portion of the local 2227 (possibly different for each block row) or `NULL`. If you plan to factor the matrix you must leave room 2228 for the diagonal entry and set a value even if it is zero. 2229 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2230 submatrix (same for all local rows). 2231 - o_nnz - array containing the number of nonzeros in the various block rows of the 2232 off-diagonal portion of the local submatrix that is right of the diagonal 2233 (possibly different for each block row) or `NULL`. 2234 2235 Options Database Keys: 2236 + -mat_no_unroll - uses code that does not unroll the loops in the 2237 block calculations (much slower) 2238 - -mat_block_size - size of the blocks to use 2239 2240 Level: intermediate 2241 2242 Notes: 2243 2244 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2245 than it must be used on all processors that share the object for that argument. 2246 2247 If the *_nnz parameter is given then the *_nz parameter is ignored 2248 2249 Storage Information: 2250 For a square global matrix we define each processor's diagonal portion 2251 to be its local rows and the corresponding columns (a square submatrix); 2252 each processor's off-diagonal portion encompasses the remainder of the 2253 local matrix (a rectangular submatrix). 2254 2255 The user can specify preallocated storage for the diagonal part of 2256 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 2257 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 2258 memory allocation. Likewise, specify preallocated storage for the 2259 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 2260 2261 You can call `MatGetInfo()` to get information on how effective the preallocation was; 2262 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2263 You can also run with the option `-info` and look for messages with the string 2264 malloc in them to see if additional memory allocation was needed. 2265 2266 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2267 the figure below we depict these three local rows and all columns (0-11). 2268 2269 .vb 2270 0 1 2 3 4 5 6 7 8 9 10 11 2271 -------------------------- 2272 row 3 |. . . d d d o o o o o o 2273 row 4 |. . . d d d o o o o o o 2274 row 5 |. . . d d d o o o o o o 2275 -------------------------- 2276 .ve 2277 2278 Thus, any entries in the d locations are stored in the d (diagonal) 2279 submatrix, and any entries in the o locations are stored in the 2280 o (off-diagonal) submatrix. Note that the d matrix is stored in 2281 `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format. 2282 2283 Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular 2284 plus the diagonal part of the d matrix, 2285 and `o_nz` should indicate the number of block nonzeros per row in the o matrix 2286 2287 In general, for PDE problems in which most nonzeros are near the diagonal, 2288 one expects `d_nz` >> `o_nz`. 2289 2290 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()` 2291 @*/ 2292 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2293 { 2294 PetscFunctionBegin; 2295 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2296 PetscValidType(B, 1); 2297 PetscValidLogicalCollectiveInt(B, bs, 2); 2298 PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 2299 PetscFunctionReturn(PETSC_SUCCESS); 2300 } 2301 2302 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2303 /*@C 2304 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`, 2305 (block compressed row). For good matrix assembly performance 2306 the user should preallocate the matrix storage by setting the parameters 2307 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 2308 2309 Collective 2310 2311 Input Parameters: 2312 + comm - MPI communicator 2313 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2314 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 2315 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 2316 This value should be the same as the local size used in creating the 2317 y vector for the matrix-vector product y = Ax. 2318 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 2319 This value should be the same as the local size used in creating the 2320 x vector for the matrix-vector product y = Ax. 2321 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 2322 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 2323 . d_nz - number of block nonzeros per block row in diagonal portion of local 2324 submatrix (same for all local rows) 2325 . d_nnz - array containing the number of block nonzeros in the various block rows 2326 in the upper triangular portion of the in diagonal portion of the local 2327 (possibly different for each block block row) or `NULL`. 2328 If you plan to factor the matrix you must leave room for the diagonal entry and 2329 set its value even if it is zero. 2330 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2331 submatrix (same for all local rows). 2332 - o_nnz - array containing the number of nonzeros in the various block rows of the 2333 off-diagonal portion of the local submatrix (possibly different for 2334 each block row) or `NULL`. 2335 2336 Output Parameter: 2337 . A - the matrix 2338 2339 Options Database Keys: 2340 + -mat_no_unroll - uses code that does not unroll the loops in the 2341 block calculations (much slower) 2342 . -mat_block_size - size of the blocks to use 2343 - -mat_mpi - use the parallel matrix data structures even on one processor 2344 (defaults to using SeqBAIJ format on one processor) 2345 2346 Level: intermediate 2347 2348 Notes: 2349 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 2350 MatXXXXSetPreallocation() paradigm instead of this routine directly. 2351 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 2352 2353 The number of rows and columns must be divisible by blocksize. 2354 This matrix type does not support complex Hermitian operation. 2355 2356 The user MUST specify either the local or global matrix dimensions 2357 (possibly both). 2358 2359 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2360 than it must be used on all processors that share the object for that argument. 2361 2362 If the *_nnz parameter is given then the *_nz parameter is ignored 2363 2364 Storage Information: 2365 For a square global matrix we define each processor's diagonal portion 2366 to be its local rows and the corresponding columns (a square submatrix); 2367 each processor's off-diagonal portion encompasses the remainder of the 2368 local matrix (a rectangular submatrix). 2369 2370 The user can specify preallocated storage for the diagonal part of 2371 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 2372 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 2373 memory allocation. Likewise, specify preallocated storage for the 2374 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 2375 2376 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2377 the figure below we depict these three local rows and all columns (0-11). 2378 2379 .vb 2380 0 1 2 3 4 5 6 7 8 9 10 11 2381 -------------------------- 2382 row 3 |. . . d d d o o o o o o 2383 row 4 |. . . d d d o o o o o o 2384 row 5 |. . . d d d o o o o o o 2385 -------------------------- 2386 .ve 2387 2388 Thus, any entries in the d locations are stored in the d (diagonal) 2389 submatrix, and any entries in the o locations are stored in the 2390 o (off-diagonal) submatrix. Note that the d matrix is stored in 2391 `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format. 2392 2393 Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular 2394 plus the diagonal part of the d matrix, 2395 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 2396 In general, for PDE problems in which most nonzeros are near the diagonal, 2397 one expects `d_nz` >> `o_nz`. 2398 2399 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 2400 @*/ 2401 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 2402 { 2403 PetscMPIInt size; 2404 2405 PetscFunctionBegin; 2406 PetscCall(MatCreate(comm, A)); 2407 PetscCall(MatSetSizes(*A, m, n, M, N)); 2408 PetscCallMPI(MPI_Comm_size(comm, &size)); 2409 if (size > 1) { 2410 PetscCall(MatSetType(*A, MATMPISBAIJ)); 2411 PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 2412 } else { 2413 PetscCall(MatSetType(*A, MATSEQSBAIJ)); 2414 PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 2415 } 2416 PetscFunctionReturn(PETSC_SUCCESS); 2417 } 2418 2419 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2420 { 2421 Mat mat; 2422 Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data; 2423 PetscInt len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs; 2424 PetscScalar *array; 2425 2426 PetscFunctionBegin; 2427 *newmat = NULL; 2428 2429 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2430 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2431 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2432 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2433 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2434 2435 mat->factortype = matin->factortype; 2436 mat->preallocated = PETSC_TRUE; 2437 mat->assembled = PETSC_TRUE; 2438 mat->insertmode = NOT_SET_VALUES; 2439 2440 a = (Mat_MPISBAIJ *)mat->data; 2441 a->bs2 = oldmat->bs2; 2442 a->mbs = oldmat->mbs; 2443 a->nbs = oldmat->nbs; 2444 a->Mbs = oldmat->Mbs; 2445 a->Nbs = oldmat->Nbs; 2446 2447 a->size = oldmat->size; 2448 a->rank = oldmat->rank; 2449 a->donotstash = oldmat->donotstash; 2450 a->roworiented = oldmat->roworiented; 2451 a->rowindices = NULL; 2452 a->rowvalues = NULL; 2453 a->getrowactive = PETSC_FALSE; 2454 a->barray = NULL; 2455 a->rstartbs = oldmat->rstartbs; 2456 a->rendbs = oldmat->rendbs; 2457 a->cstartbs = oldmat->cstartbs; 2458 a->cendbs = oldmat->cendbs; 2459 2460 /* hash table stuff */ 2461 a->ht = NULL; 2462 a->hd = NULL; 2463 a->ht_size = 0; 2464 a->ht_flag = oldmat->ht_flag; 2465 a->ht_fact = oldmat->ht_fact; 2466 a->ht_total_ct = 0; 2467 a->ht_insert_ct = 0; 2468 2469 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2)); 2470 if (oldmat->colmap) { 2471 #if defined(PETSC_USE_CTABLE) 2472 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 2473 #else 2474 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 2475 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 2476 #endif 2477 } else a->colmap = NULL; 2478 2479 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) { 2480 PetscCall(PetscMalloc1(len, &a->garray)); 2481 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 2482 } else a->garray = NULL; 2483 2484 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 2485 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 2486 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 2487 2488 PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0)); 2489 PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1)); 2490 2491 PetscCall(VecGetLocalSize(a->slvec1, &nt)); 2492 PetscCall(VecGetArray(a->slvec1, &array)); 2493 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a)); 2494 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b)); 2495 PetscCall(VecRestoreArray(a->slvec1, &array)); 2496 PetscCall(VecGetArray(a->slvec0, &array)); 2497 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b)); 2498 PetscCall(VecRestoreArray(a->slvec0, &array)); 2499 2500 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2501 PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx)); 2502 a->sMvctx = oldmat->sMvctx; 2503 2504 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 2505 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 2506 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 2507 *newmat = mat; 2508 PetscFunctionReturn(PETSC_SUCCESS); 2509 } 2510 2511 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 2512 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary 2513 2514 PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer) 2515 { 2516 PetscBool isbinary; 2517 2518 PetscFunctionBegin; 2519 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2520 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 2521 PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer)); 2522 PetscFunctionReturn(PETSC_SUCCESS); 2523 } 2524 2525 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[]) 2526 { 2527 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 2528 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(a->B)->data; 2529 PetscReal atmp; 2530 PetscReal *work, *svalues, *rvalues; 2531 PetscInt i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol; 2532 PetscMPIInt rank, size; 2533 PetscInt *rowners_bs, dest, count, source; 2534 PetscScalar *va; 2535 MatScalar *ba; 2536 MPI_Status stat; 2537 2538 PetscFunctionBegin; 2539 PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov"); 2540 PetscCall(MatGetRowMaxAbs(a->A, v, NULL)); 2541 PetscCall(VecGetArray(v, &va)); 2542 2543 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2544 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2545 2546 bs = A->rmap->bs; 2547 mbs = a->mbs; 2548 Mbs = a->Mbs; 2549 ba = b->a; 2550 bi = b->i; 2551 bj = b->j; 2552 2553 /* find ownerships */ 2554 rowners_bs = A->rmap->range; 2555 2556 /* each proc creates an array to be distributed */ 2557 PetscCall(PetscCalloc1(bs * Mbs, &work)); 2558 2559 /* row_max for B */ 2560 if (rank != size - 1) { 2561 for (i = 0; i < mbs; i++) { 2562 ncols = bi[1] - bi[0]; 2563 bi++; 2564 brow = bs * i; 2565 for (j = 0; j < ncols; j++) { 2566 bcol = bs * (*bj); 2567 for (kcol = 0; kcol < bs; kcol++) { 2568 col = bcol + kcol; /* local col index */ 2569 col += rowners_bs[rank + 1]; /* global col index */ 2570 for (krow = 0; krow < bs; krow++) { 2571 atmp = PetscAbsScalar(*ba); 2572 ba++; 2573 row = brow + krow; /* local row index */ 2574 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2575 if (work[col] < atmp) work[col] = atmp; 2576 } 2577 } 2578 bj++; 2579 } 2580 } 2581 2582 /* send values to its owners */ 2583 for (dest = rank + 1; dest < size; dest++) { 2584 svalues = work + rowners_bs[dest]; 2585 count = rowners_bs[dest + 1] - rowners_bs[dest]; 2586 PetscCallMPI(MPI_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A))); 2587 } 2588 } 2589 2590 /* receive values */ 2591 if (rank) { 2592 rvalues = work; 2593 count = rowners_bs[rank + 1] - rowners_bs[rank]; 2594 for (source = 0; source < rank; source++) { 2595 PetscCallMPI(MPI_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat)); 2596 /* process values */ 2597 for (i = 0; i < count; i++) { 2598 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2599 } 2600 } 2601 } 2602 2603 PetscCall(VecRestoreArray(v, &va)); 2604 PetscCall(PetscFree(work)); 2605 PetscFunctionReturn(PETSC_SUCCESS); 2606 } 2607 2608 PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2609 { 2610 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 2611 PetscInt mbs = mat->mbs, bs = matin->rmap->bs; 2612 PetscScalar *x, *ptr, *from; 2613 Vec bb1; 2614 const PetscScalar *b; 2615 2616 PetscFunctionBegin; 2617 PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 2618 PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented"); 2619 2620 if (flag == SOR_APPLY_UPPER) { 2621 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2622 PetscFunctionReturn(PETSC_SUCCESS); 2623 } 2624 2625 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2626 if (flag & SOR_ZERO_INITIAL_GUESS) { 2627 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx)); 2628 its--; 2629 } 2630 2631 PetscCall(VecDuplicate(bb, &bb1)); 2632 while (its--) { 2633 /* lower triangular part: slvec0b = - B^T*xx */ 2634 PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 2635 2636 /* copy xx into slvec0a */ 2637 PetscCall(VecGetArray(mat->slvec0, &ptr)); 2638 PetscCall(VecGetArray(xx, &x)); 2639 PetscCall(PetscArraycpy(ptr, x, bs * mbs)); 2640 PetscCall(VecRestoreArray(mat->slvec0, &ptr)); 2641 2642 PetscCall(VecScale(mat->slvec0, -1.0)); 2643 2644 /* copy bb into slvec1a */ 2645 PetscCall(VecGetArray(mat->slvec1, &ptr)); 2646 PetscCall(VecGetArrayRead(bb, &b)); 2647 PetscCall(PetscArraycpy(ptr, b, bs * mbs)); 2648 PetscCall(VecRestoreArray(mat->slvec1, &ptr)); 2649 2650 /* set slvec1b = 0 */ 2651 PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b)); 2652 PetscCall(VecZeroEntries(mat->slvec1b)); 2653 2654 PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2655 PetscCall(VecRestoreArray(xx, &x)); 2656 PetscCall(VecRestoreArrayRead(bb, &b)); 2657 PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2658 2659 /* upper triangular part: bb1 = bb1 - B*x */ 2660 PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1)); 2661 2662 /* local diagonal sweep */ 2663 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx)); 2664 } 2665 PetscCall(VecDestroy(&bb1)); 2666 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2667 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2668 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2669 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2670 } else if (flag & SOR_EISENSTAT) { 2671 Vec xx1; 2672 PetscBool hasop; 2673 const PetscScalar *diag; 2674 PetscScalar *sl, scale = (omega - 2.0) / omega; 2675 PetscInt i, n; 2676 2677 if (!mat->xx1) { 2678 PetscCall(VecDuplicate(bb, &mat->xx1)); 2679 PetscCall(VecDuplicate(bb, &mat->bb1)); 2680 } 2681 xx1 = mat->xx1; 2682 bb1 = mat->bb1; 2683 2684 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 2685 2686 if (!mat->diag) { 2687 /* this is wrong for same matrix with new nonzero values */ 2688 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 2689 PetscCall(MatGetDiagonal(matin, mat->diag)); 2690 } 2691 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 2692 2693 if (hasop) { 2694 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 2695 PetscCall(VecAYPX(mat->slvec1a, scale, bb)); 2696 } else { 2697 /* 2698 These two lines are replaced by code that may be a bit faster for a good compiler 2699 PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx)); 2700 PetscCall(VecAYPX(mat->slvec1a,scale,bb)); 2701 */ 2702 PetscCall(VecGetArray(mat->slvec1a, &sl)); 2703 PetscCall(VecGetArrayRead(mat->diag, &diag)); 2704 PetscCall(VecGetArrayRead(bb, &b)); 2705 PetscCall(VecGetArray(xx, &x)); 2706 PetscCall(VecGetLocalSize(xx, &n)); 2707 if (omega == 1.0) { 2708 for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i]; 2709 PetscCall(PetscLogFlops(2.0 * n)); 2710 } else { 2711 for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i]; 2712 PetscCall(PetscLogFlops(3.0 * n)); 2713 } 2714 PetscCall(VecRestoreArray(mat->slvec1a, &sl)); 2715 PetscCall(VecRestoreArrayRead(mat->diag, &diag)); 2716 PetscCall(VecRestoreArrayRead(bb, &b)); 2717 PetscCall(VecRestoreArray(xx, &x)); 2718 } 2719 2720 /* multiply off-diagonal portion of matrix */ 2721 PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b)); 2722 PetscCall(VecZeroEntries(mat->slvec1b)); 2723 PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 2724 PetscCall(VecGetArray(mat->slvec0, &from)); 2725 PetscCall(VecGetArray(xx, &x)); 2726 PetscCall(PetscArraycpy(from, x, bs * mbs)); 2727 PetscCall(VecRestoreArray(mat->slvec0, &from)); 2728 PetscCall(VecRestoreArray(xx, &x)); 2729 PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2730 PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2731 PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a)); 2732 2733 /* local sweep */ 2734 PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 2735 PetscCall(VecAXPY(xx, 1.0, xx1)); 2736 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format"); 2737 PetscFunctionReturn(PETSC_SUCCESS); 2738 } 2739 2740 /*@ 2741 MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard 2742 CSR format the local rows. 2743 2744 Collective 2745 2746 Input Parameters: 2747 + comm - MPI communicator 2748 . bs - the block size, only a block size of 1 is supported 2749 . m - number of local rows (Cannot be `PETSC_DECIDE`) 2750 . n - This value should be the same as the local size used in creating the 2751 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 2752 calculated if `N` is given) For square matrices `n` is almost always `m`. 2753 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 2754 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 2755 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 2756 . j - column indices 2757 - a - matrix values 2758 2759 Output Parameter: 2760 . mat - the matrix 2761 2762 Level: intermediate 2763 2764 Notes: 2765 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 2766 thus you CANNOT change the matrix entries by changing the values of `a` after you have 2767 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 2768 2769 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 2770 2771 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 2772 `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 2773 @*/ 2774 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 2775 { 2776 PetscFunctionBegin; 2777 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 2778 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2779 PetscCall(MatCreate(comm, mat)); 2780 PetscCall(MatSetSizes(*mat, m, n, M, N)); 2781 PetscCall(MatSetType(*mat, MATMPISBAIJ)); 2782 PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 2783 PetscFunctionReturn(PETSC_SUCCESS); 2784 } 2785 2786 /*@C 2787 MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values 2788 2789 Collective 2790 2791 Input Parameters: 2792 + B - the matrix 2793 . bs - the block size 2794 . i - the indices into `j` for the start of each local row (starts with zero) 2795 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2796 - v - optional values in the matrix 2797 2798 Level: advanced 2799 2800 Notes: 2801 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries 2802 and usually the numerical values as well 2803 2804 Any entries below the diagonal are ignored 2805 2806 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ` 2807 @*/ 2808 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2809 { 2810 PetscFunctionBegin; 2811 PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2812 PetscFunctionReturn(PETSC_SUCCESS); 2813 } 2814 2815 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 2816 { 2817 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 2818 PetscInt *indx; 2819 PetscScalar *values; 2820 2821 PetscFunctionBegin; 2822 PetscCall(MatGetSize(inmat, &m, &N)); 2823 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 2824 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data; 2825 PetscInt *dnz, *onz, mbs, Nbs, nbs; 2826 PetscInt *bindx, rmax = a->rmax, j; 2827 PetscMPIInt rank, size; 2828 2829 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 2830 mbs = m / bs; 2831 Nbs = N / cbs; 2832 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 2833 nbs = n / cbs; 2834 2835 PetscCall(PetscMalloc1(rmax, &bindx)); 2836 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 2837 2838 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2839 PetscCallMPI(MPI_Comm_rank(comm, &size)); 2840 if (rank == size - 1) { 2841 /* Check sum(nbs) = Nbs */ 2842 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 2843 } 2844 2845 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 2846 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 2847 for (i = 0; i < mbs; i++) { 2848 PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 2849 nnz = nnz / bs; 2850 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 2851 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 2852 PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 2853 } 2854 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 2855 PetscCall(PetscFree(bindx)); 2856 2857 PetscCall(MatCreate(comm, outmat)); 2858 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 2859 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 2860 PetscCall(MatSetType(*outmat, MATSBAIJ)); 2861 PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz)); 2862 PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 2863 MatPreallocateEnd(dnz, onz); 2864 } 2865 2866 /* numeric phase */ 2867 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 2868 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 2869 2870 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 2871 for (i = 0; i < m; i++) { 2872 PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 2873 Ii = i + rstart; 2874 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 2875 PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 2876 } 2877 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 2878 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 2879 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 2880 PetscFunctionReturn(PETSC_SUCCESS); 2881 } 2882