1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petscblaslapack.h> 5 6 static PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 7 { 8 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 9 10 PetscFunctionBegin; 11 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 12 PetscCall(MatStashDestroy_Private(&mat->stash)); 13 PetscCall(MatStashDestroy_Private(&mat->bstash)); 14 PetscCall(MatDestroy(&baij->A)); 15 PetscCall(MatDestroy(&baij->B)); 16 #if defined(PETSC_USE_CTABLE) 17 PetscCall(PetscHMapIDestroy(&baij->colmap)); 18 #else 19 PetscCall(PetscFree(baij->colmap)); 20 #endif 21 PetscCall(PetscFree(baij->garray)); 22 PetscCall(VecDestroy(&baij->lvec)); 23 PetscCall(VecScatterDestroy(&baij->Mvctx)); 24 PetscCall(VecDestroy(&baij->slvec0)); 25 PetscCall(VecDestroy(&baij->slvec0b)); 26 PetscCall(VecDestroy(&baij->slvec1)); 27 PetscCall(VecDestroy(&baij->slvec1a)); 28 PetscCall(VecDestroy(&baij->slvec1b)); 29 PetscCall(VecScatterDestroy(&baij->sMvctx)); 30 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 31 PetscCall(PetscFree(baij->barray)); 32 PetscCall(PetscFree(baij->hd)); 33 PetscCall(VecDestroy(&baij->diag)); 34 PetscCall(VecDestroy(&baij->bb1)); 35 PetscCall(VecDestroy(&baij->xx1)); 36 #if defined(PETSC_USE_REAL_MAT_SINGLE) 37 PetscCall(PetscFree(baij->setvaluescopy)); 38 #endif 39 PetscCall(PetscFree(baij->in_loc)); 40 PetscCall(PetscFree(baij->v_loc)); 41 PetscCall(PetscFree(baij->rangebs)); 42 PetscCall(PetscFree(mat->data)); 43 44 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 45 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 47 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL)); 48 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL)); 49 #if defined(PETSC_HAVE_ELEMENTAL) 50 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL)); 51 #endif 52 #if defined(PETSC_HAVE_SCALAPACK) 53 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL)); 54 #endif 55 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL)); 56 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL)); 57 PetscFunctionReturn(PETSC_SUCCESS); 58 } 59 60 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), MatAssemblyEnd_MPI_Hash(), MatSetUp_MPI_Hash() */ 61 #define TYPE SBAIJ 62 #define TYPE_SBAIJ 63 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 64 #undef TYPE 65 #undef TYPE_SBAIJ 66 67 #if defined(PETSC_HAVE_ELEMENTAL) 68 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 69 #endif 70 #if defined(PETSC_HAVE_SCALAPACK) 71 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 72 #endif 73 74 /* This could be moved to matimpl.h */ 75 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) 76 { 77 Mat preallocator; 78 PetscInt r, rstart, rend; 79 PetscInt bs, i, m, n, M, N; 80 PetscBool cong = PETSC_TRUE; 81 82 PetscFunctionBegin; 83 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 84 PetscValidLogicalCollectiveInt(B, nm, 2); 85 for (i = 0; i < nm; i++) { 86 PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3); 87 PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong)); 88 PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts"); 89 } 90 PetscValidLogicalCollectiveBool(B, fill, 5); 91 PetscCall(MatGetBlockSize(B, &bs)); 92 PetscCall(MatGetSize(B, &M, &N)); 93 PetscCall(MatGetLocalSize(B, &m, &n)); 94 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator)); 95 PetscCall(MatSetType(preallocator, MATPREALLOCATOR)); 96 PetscCall(MatSetBlockSize(preallocator, bs)); 97 PetscCall(MatSetSizes(preallocator, m, n, M, N)); 98 PetscCall(MatSetUp(preallocator)); 99 PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend)); 100 for (r = rstart; r < rend; ++r) { 101 PetscInt ncols; 102 const PetscInt *row; 103 const PetscScalar *vals; 104 105 for (i = 0; i < nm; i++) { 106 PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals)); 107 PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES)); 108 if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES)); 109 PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals)); 110 } 111 } 112 PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY)); 113 PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY)); 114 PetscCall(MatPreallocatorPreallocate(preallocator, fill, B)); 115 PetscCall(MatDestroy(&preallocator)); 116 PetscFunctionReturn(PETSC_SUCCESS); 117 } 118 119 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 120 { 121 Mat B; 122 PetscInt r; 123 124 PetscFunctionBegin; 125 if (reuse != MAT_REUSE_MATRIX) { 126 PetscBool symm = PETSC_TRUE, isdense; 127 PetscInt bs; 128 129 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 130 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 131 PetscCall(MatSetType(B, newtype)); 132 PetscCall(MatGetBlockSize(A, &bs)); 133 PetscCall(MatSetBlockSize(B, bs)); 134 PetscCall(PetscLayoutSetUp(B->rmap)); 135 PetscCall(PetscLayoutSetUp(B->cmap)); 136 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, "")); 137 if (!isdense) { 138 PetscCall(MatGetRowUpperTriangular(A)); 139 PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE)); 140 PetscCall(MatRestoreRowUpperTriangular(A)); 141 } else { 142 PetscCall(MatSetUp(B)); 143 } 144 } else { 145 B = *newmat; 146 PetscCall(MatZeroEntries(B)); 147 } 148 149 PetscCall(MatGetRowUpperTriangular(A)); 150 for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 151 PetscInt ncols; 152 const PetscInt *row; 153 const PetscScalar *vals; 154 155 PetscCall(MatGetRow(A, r, &ncols, &row, &vals)); 156 PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES)); 157 #if defined(PETSC_USE_COMPLEX) 158 if (A->hermitian == PETSC_BOOL3_TRUE) { 159 PetscInt i; 160 for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES)); 161 } else { 162 PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 163 } 164 #else 165 PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 166 #endif 167 PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals)); 168 } 169 PetscCall(MatRestoreRowUpperTriangular(A)); 170 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 171 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 172 173 if (reuse == MAT_INPLACE_MATRIX) { 174 PetscCall(MatHeaderReplace(A, &B)); 175 } else { 176 *newmat = B; 177 } 178 PetscFunctionReturn(PETSC_SUCCESS); 179 } 180 181 static PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 182 { 183 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 184 185 PetscFunctionBegin; 186 PetscCall(MatStoreValues(aij->A)); 187 PetscCall(MatStoreValues(aij->B)); 188 PetscFunctionReturn(PETSC_SUCCESS); 189 } 190 191 static PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 192 { 193 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 194 195 PetscFunctionBegin; 196 PetscCall(MatRetrieveValues(aij->A)); 197 PetscCall(MatRetrieveValues(aij->B)); 198 PetscFunctionReturn(PETSC_SUCCESS); 199 } 200 201 #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 202 do { \ 203 brow = row / bs; \ 204 rp = aj + ai[brow]; \ 205 ap = aa + bs2 * ai[brow]; \ 206 rmax = aimax[brow]; \ 207 nrow = ailen[brow]; \ 208 bcol = col / bs; \ 209 ridx = row % bs; \ 210 cidx = col % bs; \ 211 low = 0; \ 212 high = nrow; \ 213 while (high - low > 3) { \ 214 t = (low + high) / 2; \ 215 if (rp[t] > bcol) high = t; \ 216 else low = t; \ 217 } \ 218 for (_i = low; _i < high; _i++) { \ 219 if (rp[_i] > bcol) break; \ 220 if (rp[_i] == bcol) { \ 221 bap = ap + bs2 * _i + bs * cidx + ridx; \ 222 if (addv == ADD_VALUES) *bap += value; \ 223 else *bap = value; \ 224 goto a_noinsert; \ 225 } \ 226 } \ 227 if (a->nonew == 1) goto a_noinsert; \ 228 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 229 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 230 N = nrow++ - 1; \ 231 /* shift up all the later entries in this row */ \ 232 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 233 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 234 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 235 rp[_i] = bcol; \ 236 ap[bs2 * _i + bs * cidx + ridx] = value; \ 237 a_noinsert:; \ 238 ailen[brow] = nrow; \ 239 } while (0) 240 241 #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 242 do { \ 243 brow = row / bs; \ 244 rp = bj + bi[brow]; \ 245 ap = ba + bs2 * bi[brow]; \ 246 rmax = bimax[brow]; \ 247 nrow = bilen[brow]; \ 248 bcol = col / bs; \ 249 ridx = row % bs; \ 250 cidx = col % bs; \ 251 low = 0; \ 252 high = nrow; \ 253 while (high - low > 3) { \ 254 t = (low + high) / 2; \ 255 if (rp[t] > bcol) high = t; \ 256 else low = t; \ 257 } \ 258 for (_i = low; _i < high; _i++) { \ 259 if (rp[_i] > bcol) break; \ 260 if (rp[_i] == bcol) { \ 261 bap = ap + bs2 * _i + bs * cidx + ridx; \ 262 if (addv == ADD_VALUES) *bap += value; \ 263 else *bap = value; \ 264 goto b_noinsert; \ 265 } \ 266 } \ 267 if (b->nonew == 1) goto b_noinsert; \ 268 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 269 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 270 N = nrow++ - 1; \ 271 /* shift up all the later entries in this row */ \ 272 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 273 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 274 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 275 rp[_i] = bcol; \ 276 ap[bs2 * _i + bs * cidx + ridx] = value; \ 277 b_noinsert:; \ 278 bilen[brow] = nrow; \ 279 } while (0) 280 281 /* Only add/insert a(i,j) with i<=j (blocks). 282 Any a(i,j) with i>j input by user is ignored or generates an error 283 */ 284 static PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 285 { 286 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 287 MatScalar value; 288 PetscBool roworiented = baij->roworiented; 289 PetscInt i, j, row, col; 290 PetscInt rstart_orig = mat->rmap->rstart; 291 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 292 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 293 294 /* Some Variables required in the macro */ 295 Mat A = baij->A; 296 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 297 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 298 MatScalar *aa = a->a; 299 300 Mat B = baij->B; 301 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 302 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 303 MatScalar *ba = b->a; 304 305 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 306 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 307 MatScalar *ap, *bap; 308 309 /* for stash */ 310 PetscInt n_loc, *in_loc = NULL; 311 MatScalar *v_loc = NULL; 312 313 PetscFunctionBegin; 314 if (!baij->donotstash) { 315 if (n > baij->n_loc) { 316 PetscCall(PetscFree(baij->in_loc)); 317 PetscCall(PetscFree(baij->v_loc)); 318 PetscCall(PetscMalloc1(n, &baij->in_loc)); 319 PetscCall(PetscMalloc1(n, &baij->v_loc)); 320 321 baij->n_loc = n; 322 } 323 in_loc = baij->in_loc; 324 v_loc = baij->v_loc; 325 } 326 327 for (i = 0; i < m; i++) { 328 if (im[i] < 0) continue; 329 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 330 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 331 row = im[i] - rstart_orig; /* local row index */ 332 for (j = 0; j < n; j++) { 333 if (im[i] / bs > in[j] / bs) { 334 if (a->ignore_ltriangular) { 335 continue; /* ignore lower triangular blocks */ 336 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 337 } 338 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 339 col = in[j] - cstart_orig; /* local col index */ 340 brow = row / bs; 341 bcol = col / bs; 342 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 343 if (roworiented) value = v[i * n + j]; 344 else value = v[i + j * m]; 345 MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 346 /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */ 347 } else if (in[j] < 0) { 348 continue; 349 } else { 350 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 351 /* off-diag entry (B) */ 352 if (mat->was_assembled) { 353 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 354 #if defined(PETSC_USE_CTABLE) 355 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 356 col = col - 1; 357 #else 358 col = baij->colmap[in[j] / bs] - 1; 359 #endif 360 if (col < 0 && !((Mat_SeqSBAIJ *)baij->A->data)->nonew) { 361 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 362 col = in[j]; 363 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 364 B = baij->B; 365 b = (Mat_SeqBAIJ *)B->data; 366 bimax = b->imax; 367 bi = b->i; 368 bilen = b->ilen; 369 bj = b->j; 370 ba = b->a; 371 } else col += in[j] % bs; 372 } else col = in[j]; 373 if (roworiented) value = v[i * n + j]; 374 else value = v[i + j * m]; 375 MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 376 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 377 } 378 } 379 } else { /* off processor entry */ 380 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 381 if (!baij->donotstash) { 382 mat->assembled = PETSC_FALSE; 383 n_loc = 0; 384 for (j = 0; j < n; j++) { 385 if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */ 386 in_loc[n_loc] = in[j]; 387 if (roworiented) { 388 v_loc[n_loc] = v[i * n + j]; 389 } else { 390 v_loc[n_loc] = v[j * m + i]; 391 } 392 n_loc++; 393 } 394 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE)); 395 } 396 } 397 } 398 PetscFunctionReturn(PETSC_SUCCESS); 399 } 400 401 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 402 { 403 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 404 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 405 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 406 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 407 PetscBool roworiented = a->roworiented; 408 const PetscScalar *value = v; 409 MatScalar *ap, *aa = a->a, *bap; 410 411 PetscFunctionBegin; 412 if (col < row) { 413 if (a->ignore_ltriangular) PetscFunctionReturn(PETSC_SUCCESS); /* ignore lower triangular block */ 414 else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 415 } 416 rp = aj + ai[row]; 417 ap = aa + bs2 * ai[row]; 418 rmax = imax[row]; 419 nrow = ailen[row]; 420 value = v; 421 low = 0; 422 high = nrow; 423 424 while (high - low > 7) { 425 t = (low + high) / 2; 426 if (rp[t] > col) high = t; 427 else low = t; 428 } 429 for (i = low; i < high; i++) { 430 if (rp[i] > col) break; 431 if (rp[i] == col) { 432 bap = ap + bs2 * i; 433 if (roworiented) { 434 if (is == ADD_VALUES) { 435 for (ii = 0; ii < bs; ii++) { 436 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 437 } 438 } else { 439 for (ii = 0; ii < bs; ii++) { 440 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 441 } 442 } 443 } else { 444 if (is == ADD_VALUES) { 445 for (ii = 0; ii < bs; ii++) { 446 for (jj = 0; jj < bs; jj++) *bap++ += *value++; 447 } 448 } else { 449 for (ii = 0; ii < bs; ii++) { 450 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 451 } 452 } 453 } 454 goto noinsert2; 455 } 456 } 457 if (nonew == 1) goto noinsert2; 458 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 459 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 460 N = nrow++ - 1; 461 high++; 462 /* shift up all the later entries in this row */ 463 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 464 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 465 rp[i] = col; 466 bap = ap + bs2 * i; 467 if (roworiented) { 468 for (ii = 0; ii < bs; ii++) { 469 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 470 } 471 } else { 472 for (ii = 0; ii < bs; ii++) { 473 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 474 } 475 } 476 noinsert2:; 477 ailen[row] = nrow; 478 PetscFunctionReturn(PETSC_SUCCESS); 479 } 480 481 /* 482 This routine is exactly duplicated in mpibaij.c 483 */ 484 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 485 { 486 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 487 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 488 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 489 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 490 PetscBool roworiented = a->roworiented; 491 const PetscScalar *value = v; 492 MatScalar *ap, *aa = a->a, *bap; 493 494 PetscFunctionBegin; 495 rp = aj + ai[row]; 496 ap = aa + bs2 * ai[row]; 497 rmax = imax[row]; 498 nrow = ailen[row]; 499 low = 0; 500 high = nrow; 501 value = v; 502 while (high - low > 7) { 503 t = (low + high) / 2; 504 if (rp[t] > col) high = t; 505 else low = t; 506 } 507 for (i = low; i < high; i++) { 508 if (rp[i] > col) break; 509 if (rp[i] == col) { 510 bap = ap + bs2 * i; 511 if (roworiented) { 512 if (is == ADD_VALUES) { 513 for (ii = 0; ii < bs; ii++) { 514 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 515 } 516 } else { 517 for (ii = 0; ii < bs; ii++) { 518 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 519 } 520 } 521 } else { 522 if (is == ADD_VALUES) { 523 for (ii = 0; ii < bs; ii++, value += bs) { 524 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 525 bap += bs; 526 } 527 } else { 528 for (ii = 0; ii < bs; ii++, value += bs) { 529 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 530 bap += bs; 531 } 532 } 533 } 534 goto noinsert2; 535 } 536 } 537 if (nonew == 1) goto noinsert2; 538 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 539 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 540 N = nrow++ - 1; 541 high++; 542 /* shift up all the later entries in this row */ 543 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 544 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 545 rp[i] = col; 546 bap = ap + bs2 * i; 547 if (roworiented) { 548 for (ii = 0; ii < bs; ii++) { 549 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 550 } 551 } else { 552 for (ii = 0; ii < bs; ii++) { 553 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 554 } 555 } 556 noinsert2:; 557 ailen[row] = nrow; 558 PetscFunctionReturn(PETSC_SUCCESS); 559 } 560 561 /* 562 This routine could be optimized by removing the need for the block copy below and passing stride information 563 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 564 */ 565 static PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv) 566 { 567 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 568 const MatScalar *value; 569 MatScalar *barray = baij->barray; 570 PetscBool roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular; 571 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 572 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 573 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 574 575 PetscFunctionBegin; 576 if (!barray) { 577 PetscCall(PetscMalloc1(bs2, &barray)); 578 baij->barray = barray; 579 } 580 581 if (roworiented) { 582 stepval = (n - 1) * bs; 583 } else { 584 stepval = (m - 1) * bs; 585 } 586 for (i = 0; i < m; i++) { 587 if (im[i] < 0) continue; 588 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 589 if (im[i] >= rstart && im[i] < rend) { 590 row = im[i] - rstart; 591 for (j = 0; j < n; j++) { 592 if (im[i] > in[j]) { 593 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 594 else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 595 } 596 /* If NumCol = 1 then a copy is not required */ 597 if ((roworiented) && (n == 1)) { 598 barray = (MatScalar *)v + i * bs2; 599 } else if ((!roworiented) && (m == 1)) { 600 barray = (MatScalar *)v + j * bs2; 601 } else { /* Here a copy is required */ 602 if (roworiented) { 603 value = v + i * (stepval + bs) * bs + j * bs; 604 } else { 605 value = v + j * (stepval + bs) * bs + i * bs; 606 } 607 for (ii = 0; ii < bs; ii++, value += stepval) { 608 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 609 } 610 barray -= bs2; 611 } 612 613 if (in[j] >= cstart && in[j] < cend) { 614 col = in[j] - cstart; 615 PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 616 } else if (in[j] < 0) { 617 continue; 618 } else { 619 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 620 if (mat->was_assembled) { 621 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 622 623 #if defined(PETSC_USE_CTABLE) 624 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 625 col = col < 1 ? -1 : (col - 1) / bs; 626 #else 627 col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs; 628 #endif 629 if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 630 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 631 col = in[j]; 632 } 633 } else col = in[j]; 634 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 635 } 636 } 637 } else { 638 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 639 if (!baij->donotstash) { 640 if (roworiented) { 641 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 642 } else { 643 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 644 } 645 } 646 } 647 } 648 PetscFunctionReturn(PETSC_SUCCESS); 649 } 650 651 static PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 652 { 653 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 654 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 655 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 656 657 PetscFunctionBegin; 658 for (i = 0; i < m; i++) { 659 if (idxm[i] < 0) continue; /* negative row */ 660 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 661 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 662 row = idxm[i] - bsrstart; 663 for (j = 0; j < n; j++) { 664 if (idxn[j] < 0) continue; /* negative column */ 665 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 666 if (idxn[j] >= bscstart && idxn[j] < bscend) { 667 col = idxn[j] - bscstart; 668 PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 669 } else { 670 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 671 #if defined(PETSC_USE_CTABLE) 672 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 673 data--; 674 #else 675 data = baij->colmap[idxn[j] / bs] - 1; 676 #endif 677 if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 678 else { 679 col = data + idxn[j] % bs; 680 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 681 } 682 } 683 } 684 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 685 } 686 PetscFunctionReturn(PETSC_SUCCESS); 687 } 688 689 static PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm) 690 { 691 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 692 PetscReal sum[2], *lnorm2; 693 694 PetscFunctionBegin; 695 if (baij->size == 1) { 696 PetscCall(MatNorm(baij->A, type, norm)); 697 } else { 698 if (type == NORM_FROBENIUS) { 699 PetscCall(PetscMalloc1(2, &lnorm2)); 700 PetscCall(MatNorm(baij->A, type, lnorm2)); 701 *lnorm2 = (*lnorm2) * (*lnorm2); 702 lnorm2++; /* squar power of norm(A) */ 703 PetscCall(MatNorm(baij->B, type, lnorm2)); 704 *lnorm2 = (*lnorm2) * (*lnorm2); 705 lnorm2--; /* squar power of norm(B) */ 706 PetscCallMPI(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 707 *norm = PetscSqrtReal(sum[0] + 2 * sum[1]); 708 PetscCall(PetscFree(lnorm2)); 709 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 710 Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data; 711 Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ *)baij->B->data; 712 PetscReal *rsum, vabs; 713 PetscInt *jj, *garray = baij->garray, rstart = baij->rstartbs, nz; 714 PetscInt brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs; 715 MatScalar *v; 716 717 PetscCall(PetscMalloc1(mat->cmap->N, &rsum)); 718 PetscCall(PetscArrayzero(rsum, mat->cmap->N)); 719 /* Amat */ 720 v = amat->a; 721 jj = amat->j; 722 for (brow = 0; brow < mbs; brow++) { 723 grow = bs * (rstart + brow); 724 nz = amat->i[brow + 1] - amat->i[brow]; 725 for (bcol = 0; bcol < nz; bcol++) { 726 gcol = bs * (rstart + *jj); 727 jj++; 728 for (col = 0; col < bs; col++) { 729 for (row = 0; row < bs; row++) { 730 vabs = PetscAbsScalar(*v); 731 v++; 732 rsum[gcol + col] += vabs; 733 /* non-diagonal block */ 734 if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs; 735 } 736 } 737 } 738 PetscCall(PetscLogFlops(nz * bs * bs)); 739 } 740 /* Bmat */ 741 v = bmat->a; 742 jj = bmat->j; 743 for (brow = 0; brow < mbs; brow++) { 744 grow = bs * (rstart + brow); 745 nz = bmat->i[brow + 1] - bmat->i[brow]; 746 for (bcol = 0; bcol < nz; bcol++) { 747 gcol = bs * garray[*jj]; 748 jj++; 749 for (col = 0; col < bs; col++) { 750 for (row = 0; row < bs; row++) { 751 vabs = PetscAbsScalar(*v); 752 v++; 753 rsum[gcol + col] += vabs; 754 rsum[grow + row] += vabs; 755 } 756 } 757 } 758 PetscCall(PetscLogFlops(nz * bs * bs)); 759 } 760 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, rsum, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 761 *norm = 0.0; 762 for (col = 0; col < mat->cmap->N; col++) { 763 if (rsum[col] > *norm) *norm = rsum[col]; 764 } 765 PetscCall(PetscFree(rsum)); 766 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet"); 767 } 768 PetscFunctionReturn(PETSC_SUCCESS); 769 } 770 771 static PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode) 772 { 773 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 774 PetscInt nstash, reallocs; 775 776 PetscFunctionBegin; 777 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 778 779 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 780 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 781 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 782 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 783 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 784 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 785 PetscFunctionReturn(PETSC_SUCCESS); 786 } 787 788 static PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode) 789 { 790 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 791 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)baij->A->data; 792 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 793 PetscInt *row, *col; 794 PetscBool other_disassembled; 795 PetscMPIInt n; 796 PetscBool r1, r2, r3; 797 MatScalar *val; 798 799 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 800 PetscFunctionBegin; 801 if (!baij->donotstash && !mat->nooffprocentries) { 802 while (1) { 803 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 804 if (!flg) break; 805 806 for (i = 0; i < n;) { 807 /* Now identify the consecutive vals belonging to the same row */ 808 for (j = i, rstart = row[j]; j < n; j++) { 809 if (row[j] != rstart) break; 810 } 811 if (j < n) ncols = j - i; 812 else ncols = n - i; 813 /* Now assemble all these values with a single function call */ 814 PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 815 i = j; 816 } 817 } 818 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 819 /* Now process the block-stash. Since the values are stashed column-oriented, 820 set the row-oriented flag to column-oriented, and after MatSetValues() 821 restore the original flags */ 822 r1 = baij->roworiented; 823 r2 = a->roworiented; 824 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 825 826 baij->roworiented = PETSC_FALSE; 827 a->roworiented = PETSC_FALSE; 828 829 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 830 while (1) { 831 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 832 if (!flg) break; 833 834 for (i = 0; i < n;) { 835 /* Now identify the consecutive vals belonging to the same row */ 836 for (j = i, rstart = row[j]; j < n; j++) { 837 if (row[j] != rstart) break; 838 } 839 if (j < n) ncols = j - i; 840 else ncols = n - i; 841 PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 842 i = j; 843 } 844 } 845 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 846 847 baij->roworiented = r1; 848 a->roworiented = r2; 849 850 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */ 851 } 852 853 PetscCall(MatAssemblyBegin(baij->A, mode)); 854 PetscCall(MatAssemblyEnd(baij->A, mode)); 855 856 /* determine if any processor has disassembled, if so we must 857 also disassemble ourselves, in order that we may reassemble. */ 858 /* 859 if nonzero structure of submatrix B cannot change then we know that 860 no processor disassembled thus we can skip this stuff 861 */ 862 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 863 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 864 if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISBAIJ(mat)); 865 } 866 867 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ } 868 PetscCall(MatAssemblyBegin(baij->B, mode)); 869 PetscCall(MatAssemblyEnd(baij->B, mode)); 870 871 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 872 873 baij->rowvalues = NULL; 874 875 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 876 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 877 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 878 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 879 } 880 PetscFunctionReturn(PETSC_SUCCESS); 881 } 882 883 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode); 884 #include <petscdraw.h> 885 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 886 { 887 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 888 PetscInt bs = mat->rmap->bs; 889 PetscMPIInt rank = baij->rank; 890 PetscBool iascii, isdraw; 891 PetscViewer sviewer; 892 PetscViewerFormat format; 893 894 PetscFunctionBegin; 895 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 896 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 897 if (iascii) { 898 PetscCall(PetscViewerGetFormat(viewer, &format)); 899 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 900 MatInfo info; 901 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 902 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 903 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 904 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 905 mat->rmap->bs, info.memory)); 906 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 907 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 908 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 909 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 910 PetscCall(PetscViewerFlush(viewer)); 911 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 912 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 913 PetscCall(VecScatterView(baij->Mvctx, viewer)); 914 PetscFunctionReturn(PETSC_SUCCESS); 915 } else if (format == PETSC_VIEWER_ASCII_INFO) { 916 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 917 PetscFunctionReturn(PETSC_SUCCESS); 918 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 919 PetscFunctionReturn(PETSC_SUCCESS); 920 } 921 } 922 923 if (isdraw) { 924 PetscDraw draw; 925 PetscBool isnull; 926 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 927 PetscCall(PetscDrawIsNull(draw, &isnull)); 928 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 929 } 930 931 { 932 /* assemble the entire matrix onto first processor. */ 933 Mat A; 934 Mat_SeqSBAIJ *Aloc; 935 Mat_SeqBAIJ *Bloc; 936 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 937 MatScalar *a; 938 const char *matname; 939 940 /* Should this be the same type as mat? */ 941 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 942 if (rank == 0) { 943 PetscCall(MatSetSizes(A, M, N, M, N)); 944 } else { 945 PetscCall(MatSetSizes(A, 0, 0, M, N)); 946 } 947 PetscCall(MatSetType(A, MATMPISBAIJ)); 948 PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 949 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 950 951 /* copy over the A part */ 952 Aloc = (Mat_SeqSBAIJ *)baij->A->data; 953 ai = Aloc->i; 954 aj = Aloc->j; 955 a = Aloc->a; 956 PetscCall(PetscMalloc1(bs, &rvals)); 957 958 for (i = 0; i < mbs; i++) { 959 rvals[0] = bs * (baij->rstartbs + i); 960 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 961 for (j = ai[i]; j < ai[i + 1]; j++) { 962 col = (baij->cstartbs + aj[j]) * bs; 963 for (k = 0; k < bs; k++) { 964 PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 965 col++; 966 a += bs; 967 } 968 } 969 } 970 /* copy over the B part */ 971 Bloc = (Mat_SeqBAIJ *)baij->B->data; 972 ai = Bloc->i; 973 aj = Bloc->j; 974 a = Bloc->a; 975 for (i = 0; i < mbs; i++) { 976 rvals[0] = bs * (baij->rstartbs + i); 977 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 978 for (j = ai[i]; j < ai[i + 1]; j++) { 979 col = baij->garray[aj[j]] * bs; 980 for (k = 0; k < bs; k++) { 981 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 982 col++; 983 a += bs; 984 } 985 } 986 } 987 PetscCall(PetscFree(rvals)); 988 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 989 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 990 /* 991 Everyone has to call to draw the matrix since the graphics waits are 992 synchronized across all processors that share the PetscDraw object 993 */ 994 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 995 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 996 if (rank == 0) { 997 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)A->data)->A, matname)); 998 PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)A->data)->A, sviewer)); 999 } 1000 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1001 PetscCall(MatDestroy(&A)); 1002 } 1003 PetscFunctionReturn(PETSC_SUCCESS); 1004 } 1005 1006 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1007 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary 1008 1009 static PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer) 1010 { 1011 PetscBool iascii, isdraw, issocket, isbinary; 1012 1013 PetscFunctionBegin; 1014 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1015 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1016 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1017 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1018 if (iascii || isdraw || issocket) { 1019 PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer)); 1020 } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer)); 1021 PetscFunctionReturn(PETSC_SUCCESS); 1022 } 1023 1024 #if defined(PETSC_USE_COMPLEX) 1025 static PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy) 1026 { 1027 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1028 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1029 PetscScalar *from; 1030 const PetscScalar *x; 1031 1032 PetscFunctionBegin; 1033 /* diagonal part */ 1034 PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 1035 /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */ 1036 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1037 PetscCall(VecZeroEntries(a->slvec1b)); 1038 1039 /* subdiagonal part */ 1040 PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 1041 PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1042 1043 /* copy x into the vec slvec0 */ 1044 PetscCall(VecGetArray(a->slvec0, &from)); 1045 PetscCall(VecGetArrayRead(xx, &x)); 1046 1047 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1048 PetscCall(VecRestoreArray(a->slvec0, &from)); 1049 PetscCall(VecRestoreArrayRead(xx, &x)); 1050 1051 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1052 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1053 /* supperdiagonal part */ 1054 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 1055 PetscFunctionReturn(PETSC_SUCCESS); 1056 } 1057 #endif 1058 1059 static PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy) 1060 { 1061 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1062 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1063 PetscScalar *from; 1064 const PetscScalar *x; 1065 1066 PetscFunctionBegin; 1067 /* diagonal part */ 1068 PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 1069 /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */ 1070 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1071 PetscCall(VecZeroEntries(a->slvec1b)); 1072 1073 /* subdiagonal part */ 1074 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1075 1076 /* copy x into the vec slvec0 */ 1077 PetscCall(VecGetArray(a->slvec0, &from)); 1078 PetscCall(VecGetArrayRead(xx, &x)); 1079 1080 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1081 PetscCall(VecRestoreArray(a->slvec0, &from)); 1082 PetscCall(VecRestoreArrayRead(xx, &x)); 1083 1084 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1085 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1086 /* supperdiagonal part */ 1087 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 1088 PetscFunctionReturn(PETSC_SUCCESS); 1089 } 1090 1091 #if PetscDefined(USE_COMPLEX) 1092 static PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz) 1093 { 1094 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1095 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1096 PetscScalar *from; 1097 const PetscScalar *x; 1098 1099 PetscFunctionBegin; 1100 /* diagonal part */ 1101 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 1102 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1103 PetscCall(VecZeroEntries(a->slvec1b)); 1104 1105 /* subdiagonal part */ 1106 PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 1107 PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1108 1109 /* copy x into the vec slvec0 */ 1110 PetscCall(VecGetArray(a->slvec0, &from)); 1111 PetscCall(VecGetArrayRead(xx, &x)); 1112 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1113 PetscCall(VecRestoreArray(a->slvec0, &from)); 1114 1115 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1116 PetscCall(VecRestoreArrayRead(xx, &x)); 1117 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1118 1119 /* supperdiagonal part */ 1120 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 1121 PetscFunctionReturn(PETSC_SUCCESS); 1122 } 1123 #endif 1124 1125 static PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1126 { 1127 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1128 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1129 PetscScalar *from; 1130 const PetscScalar *x; 1131 1132 PetscFunctionBegin; 1133 /* diagonal part */ 1134 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 1135 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1136 PetscCall(VecZeroEntries(a->slvec1b)); 1137 1138 /* subdiagonal part */ 1139 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1140 1141 /* copy x into the vec slvec0 */ 1142 PetscCall(VecGetArray(a->slvec0, &from)); 1143 PetscCall(VecGetArrayRead(xx, &x)); 1144 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1145 PetscCall(VecRestoreArray(a->slvec0, &from)); 1146 1147 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1148 PetscCall(VecRestoreArrayRead(xx, &x)); 1149 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1150 1151 /* supperdiagonal part */ 1152 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 1153 PetscFunctionReturn(PETSC_SUCCESS); 1154 } 1155 1156 /* 1157 This only works correctly for square matrices where the subblock A->A is the 1158 diagonal block 1159 */ 1160 static PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v) 1161 { 1162 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1163 1164 PetscFunctionBegin; 1165 /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1166 PetscCall(MatGetDiagonal(a->A, v)); 1167 PetscFunctionReturn(PETSC_SUCCESS); 1168 } 1169 1170 static PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa) 1171 { 1172 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1173 1174 PetscFunctionBegin; 1175 PetscCall(MatScale(a->A, aa)); 1176 PetscCall(MatScale(a->B, aa)); 1177 PetscFunctionReturn(PETSC_SUCCESS); 1178 } 1179 1180 static PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1181 { 1182 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 1183 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1184 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1185 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1186 PetscInt *cmap, *idx_p, cstart = mat->rstartbs; 1187 1188 PetscFunctionBegin; 1189 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1190 mat->getrowactive = PETSC_TRUE; 1191 1192 if (!mat->rowvalues && (idx || v)) { 1193 /* 1194 allocate enough space to hold information from the longest row. 1195 */ 1196 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ *)mat->A->data; 1197 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ *)mat->B->data; 1198 PetscInt max = 1, mbs = mat->mbs, tmp; 1199 for (i = 0; i < mbs; i++) { 1200 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */ 1201 if (max < tmp) max = tmp; 1202 } 1203 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1204 } 1205 1206 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1207 lrow = row - brstart; /* local row index */ 1208 1209 pvA = &vworkA; 1210 pcA = &cworkA; 1211 pvB = &vworkB; 1212 pcB = &cworkB; 1213 if (!v) { 1214 pvA = NULL; 1215 pvB = NULL; 1216 } 1217 if (!idx) { 1218 pcA = NULL; 1219 if (!v) pcB = NULL; 1220 } 1221 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1222 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1223 nztot = nzA + nzB; 1224 1225 cmap = mat->garray; 1226 if (v || idx) { 1227 if (nztot) { 1228 /* Sort by increasing column numbers, assuming A and B already sorted */ 1229 PetscInt imark = -1; 1230 if (v) { 1231 *v = v_p = mat->rowvalues; 1232 for (i = 0; i < nzB; i++) { 1233 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1234 else break; 1235 } 1236 imark = i; 1237 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1238 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1239 } 1240 if (idx) { 1241 *idx = idx_p = mat->rowindices; 1242 if (imark > -1) { 1243 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1244 } else { 1245 for (i = 0; i < nzB; i++) { 1246 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1247 else break; 1248 } 1249 imark = i; 1250 } 1251 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1252 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1253 } 1254 } else { 1255 if (idx) *idx = NULL; 1256 if (v) *v = NULL; 1257 } 1258 } 1259 *nz = nztot; 1260 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1261 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1262 PetscFunctionReturn(PETSC_SUCCESS); 1263 } 1264 1265 static PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1266 { 1267 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1268 1269 PetscFunctionBegin; 1270 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1271 baij->getrowactive = PETSC_FALSE; 1272 PetscFunctionReturn(PETSC_SUCCESS); 1273 } 1274 1275 static PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1276 { 1277 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1278 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1279 1280 PetscFunctionBegin; 1281 aA->getrow_utriangular = PETSC_TRUE; 1282 PetscFunctionReturn(PETSC_SUCCESS); 1283 } 1284 static PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1285 { 1286 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1287 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1288 1289 PetscFunctionBegin; 1290 aA->getrow_utriangular = PETSC_FALSE; 1291 PetscFunctionReturn(PETSC_SUCCESS); 1292 } 1293 1294 static PetscErrorCode MatConjugate_MPISBAIJ(Mat mat) 1295 { 1296 PetscFunctionBegin; 1297 if (PetscDefined(USE_COMPLEX)) { 1298 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data; 1299 1300 PetscCall(MatConjugate(a->A)); 1301 PetscCall(MatConjugate(a->B)); 1302 } 1303 PetscFunctionReturn(PETSC_SUCCESS); 1304 } 1305 1306 static PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1307 { 1308 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1309 1310 PetscFunctionBegin; 1311 PetscCall(MatRealPart(a->A)); 1312 PetscCall(MatRealPart(a->B)); 1313 PetscFunctionReturn(PETSC_SUCCESS); 1314 } 1315 1316 static PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1317 { 1318 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1319 1320 PetscFunctionBegin; 1321 PetscCall(MatImaginaryPart(a->A)); 1322 PetscCall(MatImaginaryPart(a->B)); 1323 PetscFunctionReturn(PETSC_SUCCESS); 1324 } 1325 1326 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 1327 Input: isrow - distributed(parallel), 1328 iscol_local - locally owned (seq) 1329 */ 1330 static PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg) 1331 { 1332 PetscInt sz1, sz2, *a1, *a2, i, j, k, nmatch; 1333 const PetscInt *ptr1, *ptr2; 1334 1335 PetscFunctionBegin; 1336 *flg = PETSC_FALSE; 1337 PetscCall(ISGetLocalSize(isrow, &sz1)); 1338 PetscCall(ISGetLocalSize(iscol_local, &sz2)); 1339 if (sz1 > sz2) PetscFunctionReturn(PETSC_SUCCESS); 1340 1341 PetscCall(ISGetIndices(isrow, &ptr1)); 1342 PetscCall(ISGetIndices(iscol_local, &ptr2)); 1343 1344 PetscCall(PetscMalloc1(sz1, &a1)); 1345 PetscCall(PetscMalloc1(sz2, &a2)); 1346 PetscCall(PetscArraycpy(a1, ptr1, sz1)); 1347 PetscCall(PetscArraycpy(a2, ptr2, sz2)); 1348 PetscCall(PetscSortInt(sz1, a1)); 1349 PetscCall(PetscSortInt(sz2, a2)); 1350 1351 nmatch = 0; 1352 k = 0; 1353 for (i = 0; i < sz1; i++) { 1354 for (j = k; j < sz2; j++) { 1355 if (a1[i] == a2[j]) { 1356 k = j; 1357 nmatch++; 1358 break; 1359 } 1360 } 1361 } 1362 PetscCall(ISRestoreIndices(isrow, &ptr1)); 1363 PetscCall(ISRestoreIndices(iscol_local, &ptr2)); 1364 PetscCall(PetscFree(a1)); 1365 PetscCall(PetscFree(a2)); 1366 if (nmatch < sz1) { 1367 *flg = PETSC_FALSE; 1368 } else { 1369 *flg = PETSC_TRUE; 1370 } 1371 PetscFunctionReturn(PETSC_SUCCESS); 1372 } 1373 1374 static PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1375 { 1376 Mat C[2]; 1377 IS iscol_local, isrow_local; 1378 PetscInt csize, csize_local, rsize; 1379 PetscBool isequal, issorted, isidentity = PETSC_FALSE; 1380 1381 PetscFunctionBegin; 1382 PetscCall(ISGetLocalSize(iscol, &csize)); 1383 PetscCall(ISGetLocalSize(isrow, &rsize)); 1384 if (call == MAT_REUSE_MATRIX) { 1385 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1386 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1387 } else { 1388 PetscCall(ISAllGather(iscol, &iscol_local)); 1389 PetscCall(ISSorted(iscol_local, &issorted)); 1390 PetscCheck(issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must be sorted"); 1391 } 1392 PetscCall(ISEqual_private(isrow, iscol_local, &isequal)); 1393 if (!isequal) { 1394 PetscCall(ISGetLocalSize(iscol_local, &csize_local)); 1395 isidentity = (PetscBool)(mat->cmap->N == csize_local); 1396 if (!isidentity) { 1397 if (call == MAT_REUSE_MATRIX) { 1398 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather_other", (PetscObject *)&isrow_local)); 1399 PetscCheck(isrow_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1400 } else { 1401 PetscCall(ISAllGather(isrow, &isrow_local)); 1402 PetscCall(ISSorted(isrow_local, &issorted)); 1403 PetscCheck(issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, isrow must be sorted"); 1404 } 1405 } 1406 } 1407 /* now call MatCreateSubMatrix_MPIBAIJ() */ 1408 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, isequal || isidentity ? call : MAT_INITIAL_MATRIX, isequal || isidentity ? newmat : C, (PetscBool)(isequal || isidentity))); 1409 if (!isequal && !isidentity) { 1410 if (call == MAT_INITIAL_MATRIX) { 1411 IS intersect; 1412 PetscInt ni; 1413 1414 PetscCall(ISIntersect(isrow_local, iscol_local, &intersect)); 1415 PetscCall(ISGetLocalSize(intersect, &ni)); 1416 PetscCall(ISDestroy(&intersect)); 1417 PetscCheck(ni == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create such a submatrix: for symmetric format, when requesting an off-diagonal submatrix, isrow and iscol should have an empty intersection (number of common indices is %" PetscInt_FMT ")", ni); 1418 } 1419 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, iscol, isrow_local, rsize, MAT_INITIAL_MATRIX, C + 1, PETSC_FALSE)); 1420 PetscCall(MatTranspose(C[1], MAT_INPLACE_MATRIX, C + 1)); 1421 PetscCall(MatAXPY(C[0], 1.0, C[1], DIFFERENT_NONZERO_PATTERN)); 1422 if (call == MAT_REUSE_MATRIX) PetscCall(MatCopy(C[0], *newmat, SAME_NONZERO_PATTERN)); 1423 else if (mat->rmap->bs == 1) PetscCall(MatConvert(C[0], MATAIJ, MAT_INITIAL_MATRIX, newmat)); 1424 else PetscCall(MatCopy(C[0], *newmat, SAME_NONZERO_PATTERN)); 1425 PetscCall(MatDestroy(C)); 1426 PetscCall(MatDestroy(C + 1)); 1427 } 1428 if (call == MAT_INITIAL_MATRIX) { 1429 if (!isequal && !isidentity) { 1430 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather_other", (PetscObject)isrow_local)); 1431 PetscCall(ISDestroy(&isrow_local)); 1432 } 1433 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1434 PetscCall(ISDestroy(&iscol_local)); 1435 } 1436 PetscFunctionReturn(PETSC_SUCCESS); 1437 } 1438 1439 static PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1440 { 1441 Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data; 1442 1443 PetscFunctionBegin; 1444 PetscCall(MatZeroEntries(l->A)); 1445 PetscCall(MatZeroEntries(l->B)); 1446 PetscFunctionReturn(PETSC_SUCCESS); 1447 } 1448 1449 static PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1450 { 1451 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)matin->data; 1452 Mat A = a->A, B = a->B; 1453 PetscLogDouble isend[5], irecv[5]; 1454 1455 PetscFunctionBegin; 1456 info->block_size = (PetscReal)matin->rmap->bs; 1457 1458 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1459 1460 isend[0] = info->nz_used; 1461 isend[1] = info->nz_allocated; 1462 isend[2] = info->nz_unneeded; 1463 isend[3] = info->memory; 1464 isend[4] = info->mallocs; 1465 1466 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1467 1468 isend[0] += info->nz_used; 1469 isend[1] += info->nz_allocated; 1470 isend[2] += info->nz_unneeded; 1471 isend[3] += info->memory; 1472 isend[4] += info->mallocs; 1473 if (flag == MAT_LOCAL) { 1474 info->nz_used = isend[0]; 1475 info->nz_allocated = isend[1]; 1476 info->nz_unneeded = isend[2]; 1477 info->memory = isend[3]; 1478 info->mallocs = isend[4]; 1479 } else if (flag == MAT_GLOBAL_MAX) { 1480 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1481 1482 info->nz_used = irecv[0]; 1483 info->nz_allocated = irecv[1]; 1484 info->nz_unneeded = irecv[2]; 1485 info->memory = irecv[3]; 1486 info->mallocs = irecv[4]; 1487 } else if (flag == MAT_GLOBAL_SUM) { 1488 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1489 1490 info->nz_used = irecv[0]; 1491 info->nz_allocated = irecv[1]; 1492 info->nz_unneeded = irecv[2]; 1493 info->memory = irecv[3]; 1494 info->mallocs = irecv[4]; 1495 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1496 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1497 info->fill_ratio_needed = 0; 1498 info->factor_mallocs = 0; 1499 PetscFunctionReturn(PETSC_SUCCESS); 1500 } 1501 1502 static PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg) 1503 { 1504 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1505 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1506 1507 PetscFunctionBegin; 1508 switch (op) { 1509 case MAT_NEW_NONZERO_LOCATIONS: 1510 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1511 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1512 case MAT_KEEP_NONZERO_PATTERN: 1513 case MAT_SUBMAT_SINGLEIS: 1514 case MAT_NEW_NONZERO_LOCATION_ERR: 1515 MatCheckPreallocated(A, 1); 1516 PetscCall(MatSetOption(a->A, op, flg)); 1517 PetscCall(MatSetOption(a->B, op, flg)); 1518 break; 1519 case MAT_ROW_ORIENTED: 1520 MatCheckPreallocated(A, 1); 1521 a->roworiented = flg; 1522 1523 PetscCall(MatSetOption(a->A, op, flg)); 1524 PetscCall(MatSetOption(a->B, op, flg)); 1525 break; 1526 case MAT_FORCE_DIAGONAL_ENTRIES: 1527 case MAT_SORTED_FULL: 1528 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1529 break; 1530 case MAT_IGNORE_OFF_PROC_ENTRIES: 1531 a->donotstash = flg; 1532 break; 1533 case MAT_USE_HASH_TABLE: 1534 a->ht_flag = flg; 1535 break; 1536 case MAT_HERMITIAN: 1537 MatCheckPreallocated(A, 1); 1538 PetscCall(MatSetOption(a->A, op, flg)); 1539 #if defined(PETSC_USE_COMPLEX) 1540 if (flg) { /* need different mat-vec ops */ 1541 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1542 A->ops->multadd = MatMultAdd_MPISBAIJ_Hermitian; 1543 A->ops->multtranspose = NULL; 1544 A->ops->multtransposeadd = NULL; 1545 A->symmetric = PETSC_BOOL3_FALSE; 1546 } 1547 #endif 1548 break; 1549 case MAT_SPD: 1550 case MAT_SYMMETRIC: 1551 MatCheckPreallocated(A, 1); 1552 PetscCall(MatSetOption(a->A, op, flg)); 1553 #if defined(PETSC_USE_COMPLEX) 1554 if (flg) { /* restore to use default mat-vec ops */ 1555 A->ops->mult = MatMult_MPISBAIJ; 1556 A->ops->multadd = MatMultAdd_MPISBAIJ; 1557 A->ops->multtranspose = MatMult_MPISBAIJ; 1558 A->ops->multtransposeadd = MatMultAdd_MPISBAIJ; 1559 } 1560 #endif 1561 break; 1562 case MAT_STRUCTURALLY_SYMMETRIC: 1563 MatCheckPreallocated(A, 1); 1564 PetscCall(MatSetOption(a->A, op, flg)); 1565 break; 1566 case MAT_SYMMETRY_ETERNAL: 1567 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1568 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix must be symmetric"); 1569 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1570 break; 1571 case MAT_SPD_ETERNAL: 1572 break; 1573 case MAT_IGNORE_LOWER_TRIANGULAR: 1574 aA->ignore_ltriangular = flg; 1575 break; 1576 case MAT_ERROR_LOWER_TRIANGULAR: 1577 aA->ignore_ltriangular = flg; 1578 break; 1579 case MAT_GETROW_UPPERTRIANGULAR: 1580 aA->getrow_utriangular = flg; 1581 break; 1582 default: 1583 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1584 } 1585 PetscFunctionReturn(PETSC_SUCCESS); 1586 } 1587 1588 static PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B) 1589 { 1590 PetscFunctionBegin; 1591 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 1592 if (reuse == MAT_INITIAL_MATRIX) { 1593 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B)); 1594 } else if (reuse == MAT_REUSE_MATRIX) { 1595 PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN)); 1596 } 1597 PetscFunctionReturn(PETSC_SUCCESS); 1598 } 1599 1600 static PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr) 1601 { 1602 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1603 Mat a = baij->A, b = baij->B; 1604 PetscInt nv, m, n; 1605 PetscBool flg; 1606 1607 PetscFunctionBegin; 1608 if (ll != rr) { 1609 PetscCall(VecEqual(ll, rr, &flg)); 1610 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same"); 1611 } 1612 if (!ll) PetscFunctionReturn(PETSC_SUCCESS); 1613 1614 PetscCall(MatGetLocalSize(mat, &m, &n)); 1615 PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n); 1616 1617 PetscCall(VecGetLocalSize(rr, &nv)); 1618 PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size"); 1619 1620 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1621 1622 /* left diagonalscale the off-diagonal part */ 1623 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1624 1625 /* scale the diagonal part */ 1626 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1627 1628 /* right diagonalscale the off-diagonal part */ 1629 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1630 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1631 PetscFunctionReturn(PETSC_SUCCESS); 1632 } 1633 1634 static PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1635 { 1636 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1637 1638 PetscFunctionBegin; 1639 PetscCall(MatSetUnfactored(a->A)); 1640 PetscFunctionReturn(PETSC_SUCCESS); 1641 } 1642 1643 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *); 1644 1645 static PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag) 1646 { 1647 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data; 1648 Mat a, b, c, d; 1649 PetscBool flg; 1650 1651 PetscFunctionBegin; 1652 a = matA->A; 1653 b = matA->B; 1654 c = matB->A; 1655 d = matB->B; 1656 1657 PetscCall(MatEqual(a, c, &flg)); 1658 if (flg) PetscCall(MatEqual(b, d, &flg)); 1659 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1660 PetscFunctionReturn(PETSC_SUCCESS); 1661 } 1662 1663 static PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str) 1664 { 1665 PetscBool isbaij; 1666 1667 PetscFunctionBegin; 1668 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, "")); 1669 PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name); 1670 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1671 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1672 PetscCall(MatGetRowUpperTriangular(A)); 1673 PetscCall(MatCopy_Basic(A, B, str)); 1674 PetscCall(MatRestoreRowUpperTriangular(A)); 1675 } else { 1676 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1677 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1678 1679 PetscCall(MatCopy(a->A, b->A, str)); 1680 PetscCall(MatCopy(a->B, b->B, str)); 1681 } 1682 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1683 PetscFunctionReturn(PETSC_SUCCESS); 1684 } 1685 1686 static PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1687 { 1688 Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data; 1689 PetscBLASInt bnz, one = 1; 1690 Mat_SeqSBAIJ *xa, *ya; 1691 Mat_SeqBAIJ *xb, *yb; 1692 1693 PetscFunctionBegin; 1694 if (str == SAME_NONZERO_PATTERN) { 1695 PetscScalar alpha = a; 1696 xa = (Mat_SeqSBAIJ *)xx->A->data; 1697 ya = (Mat_SeqSBAIJ *)yy->A->data; 1698 PetscCall(PetscBLASIntCast(xa->nz, &bnz)); 1699 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one)); 1700 xb = (Mat_SeqBAIJ *)xx->B->data; 1701 yb = (Mat_SeqBAIJ *)yy->B->data; 1702 PetscCall(PetscBLASIntCast(xb->nz, &bnz)); 1703 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one)); 1704 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1705 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1706 PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 1707 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1708 PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 1709 } else { 1710 Mat B; 1711 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1712 PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 1713 PetscCall(MatGetRowUpperTriangular(X)); 1714 PetscCall(MatGetRowUpperTriangular(Y)); 1715 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1716 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1717 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1718 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1719 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1720 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1721 PetscCall(MatSetType(B, MATMPISBAIJ)); 1722 PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d)); 1723 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1724 PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1725 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1726 PetscCall(MatHeaderMerge(Y, &B)); 1727 PetscCall(PetscFree(nnz_d)); 1728 PetscCall(PetscFree(nnz_o)); 1729 PetscCall(MatRestoreRowUpperTriangular(X)); 1730 PetscCall(MatRestoreRowUpperTriangular(Y)); 1731 } 1732 PetscFunctionReturn(PETSC_SUCCESS); 1733 } 1734 1735 static PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[]) 1736 { 1737 PetscInt i; 1738 PetscBool flg; 1739 1740 PetscFunctionBegin; 1741 PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */ 1742 for (i = 0; i < n; i++) { 1743 PetscCall(ISEqual(irow[i], icol[i], &flg)); 1744 if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i])); 1745 } 1746 PetscFunctionReturn(PETSC_SUCCESS); 1747 } 1748 1749 static PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a) 1750 { 1751 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data; 1752 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)maij->A->data; 1753 1754 PetscFunctionBegin; 1755 if (!Y->preallocated) { 1756 PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 1757 } else if (!aij->nz) { 1758 PetscInt nonew = aij->nonew; 1759 PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 1760 aij->nonew = nonew; 1761 } 1762 PetscCall(MatShift_Basic(Y, a)); 1763 PetscFunctionReturn(PETSC_SUCCESS); 1764 } 1765 1766 static PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d) 1767 { 1768 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1769 1770 PetscFunctionBegin; 1771 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 1772 PetscCall(MatMissingDiagonal(a->A, missing, d)); 1773 if (d) { 1774 PetscInt rstart; 1775 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1776 *d += rstart / A->rmap->bs; 1777 } 1778 PetscFunctionReturn(PETSC_SUCCESS); 1779 } 1780 1781 static PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a) 1782 { 1783 PetscFunctionBegin; 1784 *a = ((Mat_MPISBAIJ *)A->data)->A; 1785 PetscFunctionReturn(PETSC_SUCCESS); 1786 } 1787 1788 static PetscErrorCode MatEliminateZeros_MPISBAIJ(Mat A, PetscBool keep) 1789 { 1790 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1791 1792 PetscFunctionBegin; 1793 PetscCall(MatEliminateZeros_SeqSBAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 1794 PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 1795 PetscFunctionReturn(PETSC_SUCCESS); 1796 } 1797 1798 static PetscErrorCode MatLoad_MPISBAIJ(Mat, PetscViewer); 1799 static PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat, Vec, PetscInt[]); 1800 static PetscErrorCode MatSOR_MPISBAIJ(Mat, Vec, PetscReal, MatSORType, PetscReal, PetscInt, PetscInt, Vec); 1801 1802 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1803 MatGetRow_MPISBAIJ, 1804 MatRestoreRow_MPISBAIJ, 1805 MatMult_MPISBAIJ, 1806 /* 4*/ MatMultAdd_MPISBAIJ, 1807 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1808 MatMultAdd_MPISBAIJ, 1809 NULL, 1810 NULL, 1811 NULL, 1812 /* 10*/ NULL, 1813 NULL, 1814 NULL, 1815 MatSOR_MPISBAIJ, 1816 MatTranspose_MPISBAIJ, 1817 /* 15*/ MatGetInfo_MPISBAIJ, 1818 MatEqual_MPISBAIJ, 1819 MatGetDiagonal_MPISBAIJ, 1820 MatDiagonalScale_MPISBAIJ, 1821 MatNorm_MPISBAIJ, 1822 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1823 MatAssemblyEnd_MPISBAIJ, 1824 MatSetOption_MPISBAIJ, 1825 MatZeroEntries_MPISBAIJ, 1826 /* 24*/ NULL, 1827 NULL, 1828 NULL, 1829 NULL, 1830 NULL, 1831 /* 29*/ MatSetUp_MPI_Hash, 1832 NULL, 1833 NULL, 1834 MatGetDiagonalBlock_MPISBAIJ, 1835 NULL, 1836 /* 34*/ MatDuplicate_MPISBAIJ, 1837 NULL, 1838 NULL, 1839 NULL, 1840 NULL, 1841 /* 39*/ MatAXPY_MPISBAIJ, 1842 MatCreateSubMatrices_MPISBAIJ, 1843 MatIncreaseOverlap_MPISBAIJ, 1844 MatGetValues_MPISBAIJ, 1845 MatCopy_MPISBAIJ, 1846 /* 44*/ NULL, 1847 MatScale_MPISBAIJ, 1848 MatShift_MPISBAIJ, 1849 NULL, 1850 NULL, 1851 /* 49*/ NULL, 1852 NULL, 1853 NULL, 1854 NULL, 1855 NULL, 1856 /* 54*/ NULL, 1857 NULL, 1858 MatSetUnfactored_MPISBAIJ, 1859 NULL, 1860 MatSetValuesBlocked_MPISBAIJ, 1861 /* 59*/ MatCreateSubMatrix_MPISBAIJ, 1862 NULL, 1863 NULL, 1864 NULL, 1865 NULL, 1866 /* 64*/ NULL, 1867 NULL, 1868 NULL, 1869 NULL, 1870 NULL, 1871 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1872 NULL, 1873 MatConvert_MPISBAIJ_Basic, 1874 NULL, 1875 NULL, 1876 /* 74*/ NULL, 1877 NULL, 1878 NULL, 1879 NULL, 1880 NULL, 1881 /* 79*/ NULL, 1882 NULL, 1883 NULL, 1884 NULL, 1885 MatLoad_MPISBAIJ, 1886 /* 84*/ NULL, 1887 NULL, 1888 NULL, 1889 NULL, 1890 NULL, 1891 /* 89*/ NULL, 1892 NULL, 1893 NULL, 1894 NULL, 1895 NULL, 1896 /* 94*/ NULL, 1897 NULL, 1898 NULL, 1899 NULL, 1900 NULL, 1901 /* 99*/ NULL, 1902 NULL, 1903 NULL, 1904 MatConjugate_MPISBAIJ, 1905 NULL, 1906 /*104*/ NULL, 1907 MatRealPart_MPISBAIJ, 1908 MatImaginaryPart_MPISBAIJ, 1909 MatGetRowUpperTriangular_MPISBAIJ, 1910 MatRestoreRowUpperTriangular_MPISBAIJ, 1911 /*109*/ NULL, 1912 NULL, 1913 NULL, 1914 NULL, 1915 MatMissingDiagonal_MPISBAIJ, 1916 /*114*/ NULL, 1917 NULL, 1918 NULL, 1919 NULL, 1920 NULL, 1921 /*119*/ NULL, 1922 NULL, 1923 NULL, 1924 NULL, 1925 NULL, 1926 /*124*/ NULL, 1927 NULL, 1928 NULL, 1929 NULL, 1930 NULL, 1931 /*129*/ NULL, 1932 NULL, 1933 NULL, 1934 NULL, 1935 NULL, 1936 /*134*/ NULL, 1937 NULL, 1938 NULL, 1939 NULL, 1940 NULL, 1941 /*139*/ MatSetBlockSizes_Default, 1942 NULL, 1943 NULL, 1944 NULL, 1945 NULL, 1946 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ, 1947 NULL, 1948 NULL, 1949 NULL, 1950 NULL, 1951 NULL, 1952 /*150*/ NULL, 1953 MatEliminateZeros_MPISBAIJ, 1954 NULL, 1955 NULL, 1956 NULL, 1957 /*155*/ NULL, 1958 MatCopyHashToXAIJ_MPI_Hash}; 1959 1960 static PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 1961 { 1962 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1963 PetscInt i, mbs, Mbs; 1964 PetscMPIInt size; 1965 1966 PetscFunctionBegin; 1967 if (B->hash_active) { 1968 B->ops[0] = b->cops; 1969 B->hash_active = PETSC_FALSE; 1970 } 1971 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 1972 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 1973 PetscCall(PetscLayoutSetUp(B->rmap)); 1974 PetscCall(PetscLayoutSetUp(B->cmap)); 1975 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 1976 PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N); 1977 PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n); 1978 1979 mbs = B->rmap->n / bs; 1980 Mbs = B->rmap->N / bs; 1981 PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs); 1982 1983 B->rmap->bs = bs; 1984 b->bs2 = bs * bs; 1985 b->mbs = mbs; 1986 b->Mbs = Mbs; 1987 b->nbs = B->cmap->n / bs; 1988 b->Nbs = B->cmap->N / bs; 1989 1990 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 1991 b->rstartbs = B->rmap->rstart / bs; 1992 b->rendbs = B->rmap->rend / bs; 1993 1994 b->cstartbs = B->cmap->rstart / bs; 1995 b->cendbs = B->cmap->rend / bs; 1996 1997 #if defined(PETSC_USE_CTABLE) 1998 PetscCall(PetscHMapIDestroy(&b->colmap)); 1999 #else 2000 PetscCall(PetscFree(b->colmap)); 2001 #endif 2002 PetscCall(PetscFree(b->garray)); 2003 PetscCall(VecDestroy(&b->lvec)); 2004 PetscCall(VecScatterDestroy(&b->Mvctx)); 2005 PetscCall(VecDestroy(&b->slvec0)); 2006 PetscCall(VecDestroy(&b->slvec0b)); 2007 PetscCall(VecDestroy(&b->slvec1)); 2008 PetscCall(VecDestroy(&b->slvec1a)); 2009 PetscCall(VecDestroy(&b->slvec1b)); 2010 PetscCall(VecScatterDestroy(&b->sMvctx)); 2011 2012 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2013 2014 MatSeqXAIJGetOptions_Private(b->B); 2015 PetscCall(MatDestroy(&b->B)); 2016 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2017 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2018 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 2019 MatSeqXAIJRestoreOptions_Private(b->B); 2020 2021 MatSeqXAIJGetOptions_Private(b->A); 2022 PetscCall(MatDestroy(&b->A)); 2023 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2024 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2025 PetscCall(MatSetType(b->A, MATSEQSBAIJ)); 2026 MatSeqXAIJRestoreOptions_Private(b->A); 2027 2028 PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 2029 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 2030 2031 B->preallocated = PETSC_TRUE; 2032 B->was_assembled = PETSC_FALSE; 2033 B->assembled = PETSC_FALSE; 2034 PetscFunctionReturn(PETSC_SUCCESS); 2035 } 2036 2037 static PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 2038 { 2039 PetscInt m, rstart, cend; 2040 PetscInt i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2041 const PetscInt *JJ = NULL; 2042 PetscScalar *values = NULL; 2043 PetscBool roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented; 2044 PetscBool nooffprocentries; 2045 2046 PetscFunctionBegin; 2047 PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 2048 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2049 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2050 PetscCall(PetscLayoutSetUp(B->rmap)); 2051 PetscCall(PetscLayoutSetUp(B->cmap)); 2052 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2053 m = B->rmap->n / bs; 2054 rstart = B->rmap->rstart / bs; 2055 cend = B->cmap->rend / bs; 2056 2057 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2058 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2059 for (i = 0; i < m; i++) { 2060 nz = ii[i + 1] - ii[i]; 2061 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2062 /* count the ones on the diagonal and above, split into diagonal and off-diagonal portions. */ 2063 JJ = jj + ii[i]; 2064 bd = 0; 2065 for (j = 0; j < nz; j++) { 2066 if (*JJ >= i + rstart) break; 2067 JJ++; 2068 bd++; 2069 } 2070 d = 0; 2071 for (; j < nz; j++) { 2072 if (*JJ++ >= cend) break; 2073 d++; 2074 } 2075 d_nnz[i] = d; 2076 o_nnz[i] = nz - d - bd; 2077 nz = nz - bd; 2078 nz_max = PetscMax(nz_max, nz); 2079 } 2080 PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2081 PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE)); 2082 PetscCall(PetscFree2(d_nnz, o_nnz)); 2083 2084 values = (PetscScalar *)V; 2085 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2086 for (i = 0; i < m; i++) { 2087 PetscInt row = i + rstart; 2088 PetscInt ncols = ii[i + 1] - ii[i]; 2089 const PetscInt *icols = jj + ii[i]; 2090 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2091 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2092 PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2093 } else { /* block ordering does not match so we can only insert one block at a time. */ 2094 PetscInt j; 2095 for (j = 0; j < ncols; j++) { 2096 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2097 PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2098 } 2099 } 2100 } 2101 2102 if (!V) PetscCall(PetscFree(values)); 2103 nooffprocentries = B->nooffprocentries; 2104 B->nooffprocentries = PETSC_TRUE; 2105 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2106 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2107 B->nooffprocentries = nooffprocentries; 2108 2109 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2110 PetscFunctionReturn(PETSC_SUCCESS); 2111 } 2112 2113 /*MC 2114 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2115 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2116 the matrix is stored. 2117 2118 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2119 can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`); 2120 2121 Options Database Key: 2122 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()` 2123 2124 Level: beginner 2125 2126 Note: 2127 The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the 2128 diagonal portion of the matrix of each process has to less than or equal the number of columns. 2129 2130 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType` 2131 M*/ 2132 2133 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2134 { 2135 Mat_MPISBAIJ *b; 2136 PetscBool flg = PETSC_FALSE; 2137 2138 PetscFunctionBegin; 2139 PetscCall(PetscNew(&b)); 2140 B->data = (void *)b; 2141 B->ops[0] = MatOps_Values; 2142 2143 B->ops->destroy = MatDestroy_MPISBAIJ; 2144 B->ops->view = MatView_MPISBAIJ; 2145 B->assembled = PETSC_FALSE; 2146 B->insertmode = NOT_SET_VALUES; 2147 2148 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2149 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2150 2151 /* build local table of row and column ownerships */ 2152 PetscCall(PetscMalloc1(b->size + 2, &b->rangebs)); 2153 2154 /* build cache for off array entries formed */ 2155 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2156 2157 b->donotstash = PETSC_FALSE; 2158 b->colmap = NULL; 2159 b->garray = NULL; 2160 b->roworiented = PETSC_TRUE; 2161 2162 /* stuff used in block assembly */ 2163 b->barray = NULL; 2164 2165 /* stuff used for matrix vector multiply */ 2166 b->lvec = NULL; 2167 b->Mvctx = NULL; 2168 b->slvec0 = NULL; 2169 b->slvec0b = NULL; 2170 b->slvec1 = NULL; 2171 b->slvec1a = NULL; 2172 b->slvec1b = NULL; 2173 b->sMvctx = NULL; 2174 2175 /* stuff for MatGetRow() */ 2176 b->rowindices = NULL; 2177 b->rowvalues = NULL; 2178 b->getrowactive = PETSC_FALSE; 2179 2180 /* hash table stuff */ 2181 b->ht = NULL; 2182 b->hd = NULL; 2183 b->ht_size = 0; 2184 b->ht_flag = PETSC_FALSE; 2185 b->ht_fact = 0; 2186 b->ht_total_ct = 0; 2187 b->ht_insert_ct = 0; 2188 2189 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2190 b->ijonly = PETSC_FALSE; 2191 2192 b->in_loc = NULL; 2193 b->v_loc = NULL; 2194 b->n_loc = 0; 2195 2196 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ)); 2197 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ)); 2198 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ)); 2199 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ)); 2200 #if defined(PETSC_HAVE_ELEMENTAL) 2201 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental)); 2202 #endif 2203 #if defined(PETSC_HAVE_SCALAPACK) 2204 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK)); 2205 #endif 2206 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic)); 2207 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic)); 2208 2209 B->symmetric = PETSC_BOOL3_TRUE; 2210 B->structurally_symmetric = PETSC_BOOL3_TRUE; 2211 B->symmetry_eternal = PETSC_TRUE; 2212 B->structural_symmetry_eternal = PETSC_TRUE; 2213 #if defined(PETSC_USE_COMPLEX) 2214 B->hermitian = PETSC_BOOL3_FALSE; 2215 #else 2216 B->hermitian = PETSC_BOOL3_TRUE; 2217 #endif 2218 2219 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ)); 2220 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat"); 2221 PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL)); 2222 if (flg) { 2223 PetscReal fact = 1.39; 2224 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2225 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2226 if (fact <= 1.0) fact = 1.39; 2227 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2228 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2229 } 2230 PetscOptionsEnd(); 2231 PetscFunctionReturn(PETSC_SUCCESS); 2232 } 2233 2234 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2235 /*MC 2236 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2237 2238 This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator, 2239 and `MATMPISBAIJ` otherwise. 2240 2241 Options Database Key: 2242 . -mat_type sbaij - sets the matrix type to `MATSBAIJ` during a call to `MatSetFromOptions()` 2243 2244 Level: beginner 2245 2246 .seealso: [](ch_matrices), `Mat`, `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ()`, `MATSEQSBAIJ`, `MATMPISBAIJ` 2247 M*/ 2248 2249 /*@ 2250 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2251 the user should preallocate the matrix storage by setting the parameters 2252 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2253 performance can be increased by more than a factor of 50. 2254 2255 Collective 2256 2257 Input Parameters: 2258 + B - the matrix 2259 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2260 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2261 . d_nz - number of block nonzeros per block row in diagonal portion of local 2262 submatrix (same for all local rows) 2263 . d_nnz - array containing the number of block nonzeros in the various block rows 2264 in the upper triangular and diagonal part of the in diagonal portion of the local 2265 (possibly different for each block row) or `NULL`. If you plan to factor the matrix you must leave room 2266 for the diagonal entry and set a value even if it is zero. 2267 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2268 submatrix (same for all local rows). 2269 - o_nnz - array containing the number of nonzeros in the various block rows of the 2270 off-diagonal portion of the local submatrix that is right of the diagonal 2271 (possibly different for each block row) or `NULL`. 2272 2273 Options Database Keys: 2274 + -mat_no_unroll - uses code that does not unroll the loops in the 2275 block calculations (much slower) 2276 - -mat_block_size - size of the blocks to use 2277 2278 Level: intermediate 2279 2280 Notes: 2281 2282 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2283 than it must be used on all processors that share the object for that argument. 2284 2285 If the *_nnz parameter is given then the *_nz parameter is ignored 2286 2287 Storage Information: 2288 For a square global matrix we define each processor's diagonal portion 2289 to be its local rows and the corresponding columns (a square submatrix); 2290 each processor's off-diagonal portion encompasses the remainder of the 2291 local matrix (a rectangular submatrix). 2292 2293 The user can specify preallocated storage for the diagonal part of 2294 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 2295 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 2296 memory allocation. Likewise, specify preallocated storage for the 2297 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 2298 2299 You can call `MatGetInfo()` to get information on how effective the preallocation was; 2300 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2301 You can also run with the option `-info` and look for messages with the string 2302 malloc in them to see if additional memory allocation was needed. 2303 2304 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2305 the figure below we depict these three local rows and all columns (0-11). 2306 2307 .vb 2308 0 1 2 3 4 5 6 7 8 9 10 11 2309 -------------------------- 2310 row 3 |. . . d d d o o o o o o 2311 row 4 |. . . d d d o o o o o o 2312 row 5 |. . . d d d o o o o o o 2313 -------------------------- 2314 .ve 2315 2316 Thus, any entries in the d locations are stored in the d (diagonal) 2317 submatrix, and any entries in the o locations are stored in the 2318 o (off-diagonal) submatrix. Note that the d matrix is stored in 2319 `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format. 2320 2321 Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular 2322 plus the diagonal part of the d matrix, 2323 and `o_nz` should indicate the number of block nonzeros per row in the o matrix 2324 2325 In general, for PDE problems in which most nonzeros are near the diagonal, 2326 one expects `d_nz` >> `o_nz`. 2327 2328 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()` 2329 @*/ 2330 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2331 { 2332 PetscFunctionBegin; 2333 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2334 PetscValidType(B, 1); 2335 PetscValidLogicalCollectiveInt(B, bs, 2); 2336 PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 2337 PetscFunctionReturn(PETSC_SUCCESS); 2338 } 2339 2340 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2341 /*@ 2342 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`, 2343 (block compressed row). For good matrix assembly performance 2344 the user should preallocate the matrix storage by setting the parameters 2345 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 2346 2347 Collective 2348 2349 Input Parameters: 2350 + comm - MPI communicator 2351 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2352 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 2353 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 2354 This value should be the same as the local size used in creating the 2355 y vector for the matrix-vector product y = Ax. 2356 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 2357 This value should be the same as the local size used in creating the 2358 x vector for the matrix-vector product y = Ax. 2359 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 2360 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 2361 . d_nz - number of block nonzeros per block row in diagonal portion of local 2362 submatrix (same for all local rows) 2363 . d_nnz - array containing the number of block nonzeros in the various block rows 2364 in the upper triangular portion of the in diagonal portion of the local 2365 (possibly different for each block block row) or `NULL`. 2366 If you plan to factor the matrix you must leave room for the diagonal entry and 2367 set its value even if it is zero. 2368 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2369 submatrix (same for all local rows). 2370 - o_nnz - array containing the number of nonzeros in the various block rows of the 2371 off-diagonal portion of the local submatrix (possibly different for 2372 each block row) or `NULL`. 2373 2374 Output Parameter: 2375 . A - the matrix 2376 2377 Options Database Keys: 2378 + -mat_no_unroll - uses code that does not unroll the loops in the 2379 block calculations (much slower) 2380 . -mat_block_size - size of the blocks to use 2381 - -mat_mpi - use the parallel matrix data structures even on one processor 2382 (defaults to using SeqBAIJ format on one processor) 2383 2384 Level: intermediate 2385 2386 Notes: 2387 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 2388 MatXXXXSetPreallocation() paradigm instead of this routine directly. 2389 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 2390 2391 The number of rows and columns must be divisible by blocksize. 2392 This matrix type does not support complex Hermitian operation. 2393 2394 The user MUST specify either the local or global matrix dimensions 2395 (possibly both). 2396 2397 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2398 than it must be used on all processors that share the object for that argument. 2399 2400 If `m` and `n` are not `PETSC_DECIDE`, then the values determines the `PetscLayout` of the matrix and the ranges returned by 2401 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 2402 2403 If the *_nnz parameter is given then the *_nz parameter is ignored 2404 2405 Storage Information: 2406 For a square global matrix we define each processor's diagonal portion 2407 to be its local rows and the corresponding columns (a square submatrix); 2408 each processor's off-diagonal portion encompasses the remainder of the 2409 local matrix (a rectangular submatrix). 2410 2411 The user can specify preallocated storage for the diagonal part of 2412 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 2413 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 2414 memory allocation. Likewise, specify preallocated storage for the 2415 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 2416 2417 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2418 the figure below we depict these three local rows and all columns (0-11). 2419 2420 .vb 2421 0 1 2 3 4 5 6 7 8 9 10 11 2422 -------------------------- 2423 row 3 |. . . d d d o o o o o o 2424 row 4 |. . . d d d o o o o o o 2425 row 5 |. . . d d d o o o o o o 2426 -------------------------- 2427 .ve 2428 2429 Thus, any entries in the d locations are stored in the d (diagonal) 2430 submatrix, and any entries in the o locations are stored in the 2431 o (off-diagonal) submatrix. Note that the d matrix is stored in 2432 `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format. 2433 2434 Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular 2435 plus the diagonal part of the d matrix, 2436 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 2437 In general, for PDE problems in which most nonzeros are near the diagonal, 2438 one expects `d_nz` >> `o_nz`. 2439 2440 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, 2441 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout` 2442 @*/ 2443 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 2444 { 2445 PetscMPIInt size; 2446 2447 PetscFunctionBegin; 2448 PetscCall(MatCreate(comm, A)); 2449 PetscCall(MatSetSizes(*A, m, n, M, N)); 2450 PetscCallMPI(MPI_Comm_size(comm, &size)); 2451 if (size > 1) { 2452 PetscCall(MatSetType(*A, MATMPISBAIJ)); 2453 PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 2454 } else { 2455 PetscCall(MatSetType(*A, MATSEQSBAIJ)); 2456 PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 2457 } 2458 PetscFunctionReturn(PETSC_SUCCESS); 2459 } 2460 2461 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2462 { 2463 Mat mat; 2464 Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data; 2465 PetscInt len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs; 2466 PetscScalar *array; 2467 2468 PetscFunctionBegin; 2469 *newmat = NULL; 2470 2471 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2472 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2473 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2474 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2475 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2476 2477 if (matin->hash_active) { 2478 PetscCall(MatSetUp(mat)); 2479 } else { 2480 mat->factortype = matin->factortype; 2481 mat->preallocated = PETSC_TRUE; 2482 mat->assembled = PETSC_TRUE; 2483 mat->insertmode = NOT_SET_VALUES; 2484 2485 a = (Mat_MPISBAIJ *)mat->data; 2486 a->bs2 = oldmat->bs2; 2487 a->mbs = oldmat->mbs; 2488 a->nbs = oldmat->nbs; 2489 a->Mbs = oldmat->Mbs; 2490 a->Nbs = oldmat->Nbs; 2491 2492 a->size = oldmat->size; 2493 a->rank = oldmat->rank; 2494 a->donotstash = oldmat->donotstash; 2495 a->roworiented = oldmat->roworiented; 2496 a->rowindices = NULL; 2497 a->rowvalues = NULL; 2498 a->getrowactive = PETSC_FALSE; 2499 a->barray = NULL; 2500 a->rstartbs = oldmat->rstartbs; 2501 a->rendbs = oldmat->rendbs; 2502 a->cstartbs = oldmat->cstartbs; 2503 a->cendbs = oldmat->cendbs; 2504 2505 /* hash table stuff */ 2506 a->ht = NULL; 2507 a->hd = NULL; 2508 a->ht_size = 0; 2509 a->ht_flag = oldmat->ht_flag; 2510 a->ht_fact = oldmat->ht_fact; 2511 a->ht_total_ct = 0; 2512 a->ht_insert_ct = 0; 2513 2514 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2)); 2515 if (oldmat->colmap) { 2516 #if defined(PETSC_USE_CTABLE) 2517 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 2518 #else 2519 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 2520 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 2521 #endif 2522 } else a->colmap = NULL; 2523 2524 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) { 2525 PetscCall(PetscMalloc1(len, &a->garray)); 2526 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 2527 } else a->garray = NULL; 2528 2529 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 2530 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 2531 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 2532 2533 PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0)); 2534 PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1)); 2535 2536 PetscCall(VecGetLocalSize(a->slvec1, &nt)); 2537 PetscCall(VecGetArray(a->slvec1, &array)); 2538 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a)); 2539 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b)); 2540 PetscCall(VecRestoreArray(a->slvec1, &array)); 2541 PetscCall(VecGetArray(a->slvec0, &array)); 2542 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b)); 2543 PetscCall(VecRestoreArray(a->slvec0, &array)); 2544 2545 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2546 PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx)); 2547 a->sMvctx = oldmat->sMvctx; 2548 2549 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 2550 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 2551 } 2552 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 2553 *newmat = mat; 2554 PetscFunctionReturn(PETSC_SUCCESS); 2555 } 2556 2557 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 2558 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary 2559 2560 static PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer) 2561 { 2562 PetscBool isbinary; 2563 2564 PetscFunctionBegin; 2565 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2566 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 2567 PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer)); 2568 PetscFunctionReturn(PETSC_SUCCESS); 2569 } 2570 2571 static PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[]) 2572 { 2573 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 2574 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)a->B->data; 2575 PetscReal atmp; 2576 PetscReal *work, *svalues, *rvalues; 2577 PetscInt i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol; 2578 PetscMPIInt rank, size; 2579 PetscInt *rowners_bs, count, source; 2580 PetscScalar *va; 2581 MatScalar *ba; 2582 MPI_Status stat; 2583 2584 PetscFunctionBegin; 2585 PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov"); 2586 PetscCall(MatGetRowMaxAbs(a->A, v, NULL)); 2587 PetscCall(VecGetArray(v, &va)); 2588 2589 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2590 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2591 2592 bs = A->rmap->bs; 2593 mbs = a->mbs; 2594 Mbs = a->Mbs; 2595 ba = b->a; 2596 bi = b->i; 2597 bj = b->j; 2598 2599 /* find ownerships */ 2600 rowners_bs = A->rmap->range; 2601 2602 /* each proc creates an array to be distributed */ 2603 PetscCall(PetscCalloc1(bs * Mbs, &work)); 2604 2605 /* row_max for B */ 2606 if (rank != size - 1) { 2607 for (i = 0; i < mbs; i++) { 2608 ncols = bi[1] - bi[0]; 2609 bi++; 2610 brow = bs * i; 2611 for (j = 0; j < ncols; j++) { 2612 bcol = bs * (*bj); 2613 for (kcol = 0; kcol < bs; kcol++) { 2614 col = bcol + kcol; /* local col index */ 2615 col += rowners_bs[rank + 1]; /* global col index */ 2616 for (krow = 0; krow < bs; krow++) { 2617 atmp = PetscAbsScalar(*ba); 2618 ba++; 2619 row = brow + krow; /* local row index */ 2620 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2621 if (work[col] < atmp) work[col] = atmp; 2622 } 2623 } 2624 bj++; 2625 } 2626 } 2627 2628 /* send values to its owners */ 2629 for (PetscMPIInt dest = rank + 1; dest < size; dest++) { 2630 svalues = work + rowners_bs[dest]; 2631 count = rowners_bs[dest + 1] - rowners_bs[dest]; 2632 PetscCallMPI(MPIU_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A))); 2633 } 2634 } 2635 2636 /* receive values */ 2637 if (rank) { 2638 rvalues = work; 2639 count = rowners_bs[rank + 1] - rowners_bs[rank]; 2640 for (source = 0; source < rank; source++) { 2641 PetscCallMPI(MPIU_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat)); 2642 /* process values */ 2643 for (i = 0; i < count; i++) { 2644 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2645 } 2646 } 2647 } 2648 2649 PetscCall(VecRestoreArray(v, &va)); 2650 PetscCall(PetscFree(work)); 2651 PetscFunctionReturn(PETSC_SUCCESS); 2652 } 2653 2654 static PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2655 { 2656 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 2657 PetscInt mbs = mat->mbs, bs = matin->rmap->bs; 2658 PetscScalar *x, *ptr, *from; 2659 Vec bb1; 2660 const PetscScalar *b; 2661 2662 PetscFunctionBegin; 2663 PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 2664 PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented"); 2665 2666 if (flag == SOR_APPLY_UPPER) { 2667 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2668 PetscFunctionReturn(PETSC_SUCCESS); 2669 } 2670 2671 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2672 if (flag & SOR_ZERO_INITIAL_GUESS) { 2673 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx)); 2674 its--; 2675 } 2676 2677 PetscCall(VecDuplicate(bb, &bb1)); 2678 while (its--) { 2679 /* lower triangular part: slvec0b = - B^T*xx */ 2680 PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 2681 2682 /* copy xx into slvec0a */ 2683 PetscCall(VecGetArray(mat->slvec0, &ptr)); 2684 PetscCall(VecGetArray(xx, &x)); 2685 PetscCall(PetscArraycpy(ptr, x, bs * mbs)); 2686 PetscCall(VecRestoreArray(mat->slvec0, &ptr)); 2687 2688 PetscCall(VecScale(mat->slvec0, -1.0)); 2689 2690 /* copy bb into slvec1a */ 2691 PetscCall(VecGetArray(mat->slvec1, &ptr)); 2692 PetscCall(VecGetArrayRead(bb, &b)); 2693 PetscCall(PetscArraycpy(ptr, b, bs * mbs)); 2694 PetscCall(VecRestoreArray(mat->slvec1, &ptr)); 2695 2696 /* set slvec1b = 0 */ 2697 PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b)); 2698 PetscCall(VecZeroEntries(mat->slvec1b)); 2699 2700 PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2701 PetscCall(VecRestoreArray(xx, &x)); 2702 PetscCall(VecRestoreArrayRead(bb, &b)); 2703 PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2704 2705 /* upper triangular part: bb1 = bb1 - B*x */ 2706 PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1)); 2707 2708 /* local diagonal sweep */ 2709 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx)); 2710 } 2711 PetscCall(VecDestroy(&bb1)); 2712 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2713 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2714 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2715 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2716 } else if (flag & SOR_EISENSTAT) { 2717 Vec xx1; 2718 PetscBool hasop; 2719 const PetscScalar *diag; 2720 PetscScalar *sl, scale = (omega - 2.0) / omega; 2721 PetscInt i, n; 2722 2723 if (!mat->xx1) { 2724 PetscCall(VecDuplicate(bb, &mat->xx1)); 2725 PetscCall(VecDuplicate(bb, &mat->bb1)); 2726 } 2727 xx1 = mat->xx1; 2728 bb1 = mat->bb1; 2729 2730 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 2731 2732 if (!mat->diag) { 2733 /* this is wrong for same matrix with new nonzero values */ 2734 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 2735 PetscCall(MatGetDiagonal(matin, mat->diag)); 2736 } 2737 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 2738 2739 if (hasop) { 2740 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 2741 PetscCall(VecAYPX(mat->slvec1a, scale, bb)); 2742 } else { 2743 /* 2744 These two lines are replaced by code that may be a bit faster for a good compiler 2745 PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx)); 2746 PetscCall(VecAYPX(mat->slvec1a,scale,bb)); 2747 */ 2748 PetscCall(VecGetArray(mat->slvec1a, &sl)); 2749 PetscCall(VecGetArrayRead(mat->diag, &diag)); 2750 PetscCall(VecGetArrayRead(bb, &b)); 2751 PetscCall(VecGetArray(xx, &x)); 2752 PetscCall(VecGetLocalSize(xx, &n)); 2753 if (omega == 1.0) { 2754 for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i]; 2755 PetscCall(PetscLogFlops(2.0 * n)); 2756 } else { 2757 for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i]; 2758 PetscCall(PetscLogFlops(3.0 * n)); 2759 } 2760 PetscCall(VecRestoreArray(mat->slvec1a, &sl)); 2761 PetscCall(VecRestoreArrayRead(mat->diag, &diag)); 2762 PetscCall(VecRestoreArrayRead(bb, &b)); 2763 PetscCall(VecRestoreArray(xx, &x)); 2764 } 2765 2766 /* multiply off-diagonal portion of matrix */ 2767 PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b)); 2768 PetscCall(VecZeroEntries(mat->slvec1b)); 2769 PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 2770 PetscCall(VecGetArray(mat->slvec0, &from)); 2771 PetscCall(VecGetArray(xx, &x)); 2772 PetscCall(PetscArraycpy(from, x, bs * mbs)); 2773 PetscCall(VecRestoreArray(mat->slvec0, &from)); 2774 PetscCall(VecRestoreArray(xx, &x)); 2775 PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2776 PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2777 PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a)); 2778 2779 /* local sweep */ 2780 PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 2781 PetscCall(VecAXPY(xx, 1.0, xx1)); 2782 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format"); 2783 PetscFunctionReturn(PETSC_SUCCESS); 2784 } 2785 2786 /*@ 2787 MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard CSR format for the local rows. 2788 2789 Collective 2790 2791 Input Parameters: 2792 + comm - MPI communicator 2793 . bs - the block size, only a block size of 1 is supported 2794 . m - number of local rows (Cannot be `PETSC_DECIDE`) 2795 . n - This value should be the same as the local size used in creating the 2796 x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have 2797 calculated if `N` is given) For square matrices `n` is almost always `m`. 2798 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 2799 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 2800 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 2801 . j - column indices 2802 - a - matrix values 2803 2804 Output Parameter: 2805 . mat - the matrix 2806 2807 Level: intermediate 2808 2809 Notes: 2810 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 2811 thus you CANNOT change the matrix entries by changing the values of `a` after you have 2812 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 2813 2814 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 2815 2816 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 2817 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatMPISBAIJSetPreallocationCSR()` 2818 @*/ 2819 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 2820 { 2821 PetscFunctionBegin; 2822 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 2823 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2824 PetscCall(MatCreate(comm, mat)); 2825 PetscCall(MatSetSizes(*mat, m, n, M, N)); 2826 PetscCall(MatSetType(*mat, MATMPISBAIJ)); 2827 PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 2828 PetscFunctionReturn(PETSC_SUCCESS); 2829 } 2830 2831 /*@ 2832 MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values 2833 2834 Collective 2835 2836 Input Parameters: 2837 + B - the matrix 2838 . bs - the block size 2839 . i - the indices into `j` for the start of each local row (indices start with zero) 2840 . j - the column indices for each local row (indices start with zero) these must be sorted for each row 2841 - v - optional values in the matrix, pass `NULL` if not provided 2842 2843 Level: advanced 2844 2845 Notes: 2846 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 2847 thus you CANNOT change the matrix entries by changing the values of `v` after you have 2848 called this routine. 2849 2850 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries 2851 and usually the numerical values as well 2852 2853 Any entries passed in that are below the diagonal are ignored 2854 2855 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, 2856 `MatCreateMPISBAIJWithArrays()` 2857 @*/ 2858 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2859 { 2860 PetscFunctionBegin; 2861 PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2862 PetscFunctionReturn(PETSC_SUCCESS); 2863 } 2864 2865 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 2866 { 2867 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 2868 PetscInt *indx; 2869 PetscScalar *values; 2870 2871 PetscFunctionBegin; 2872 PetscCall(MatGetSize(inmat, &m, &N)); 2873 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 2874 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data; 2875 PetscInt *dnz, *onz, mbs, Nbs, nbs; 2876 PetscInt *bindx, rmax = a->rmax, j; 2877 PetscMPIInt rank, size; 2878 2879 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 2880 mbs = m / bs; 2881 Nbs = N / cbs; 2882 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 2883 nbs = n / cbs; 2884 2885 PetscCall(PetscMalloc1(rmax, &bindx)); 2886 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 2887 2888 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2889 PetscCallMPI(MPI_Comm_rank(comm, &size)); 2890 if (rank == size - 1) { 2891 /* Check sum(nbs) = Nbs */ 2892 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 2893 } 2894 2895 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 2896 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 2897 for (i = 0; i < mbs; i++) { 2898 PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 2899 nnz = nnz / bs; 2900 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 2901 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 2902 PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 2903 } 2904 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 2905 PetscCall(PetscFree(bindx)); 2906 2907 PetscCall(MatCreate(comm, outmat)); 2908 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 2909 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 2910 PetscCall(MatSetType(*outmat, MATSBAIJ)); 2911 PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz)); 2912 PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 2913 MatPreallocateEnd(dnz, onz); 2914 } 2915 2916 /* numeric phase */ 2917 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 2918 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 2919 2920 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 2921 for (i = 0; i < m; i++) { 2922 PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 2923 Ii = i + rstart; 2924 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 2925 PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 2926 } 2927 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 2928 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 2929 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 2930 PetscFunctionReturn(PETSC_SUCCESS); 2931 } 2932