1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petscblaslapack.h> 5 6 static PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 7 { 8 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 9 10 PetscFunctionBegin; 11 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 12 PetscCall(MatStashDestroy_Private(&mat->stash)); 13 PetscCall(MatStashDestroy_Private(&mat->bstash)); 14 PetscCall(MatDestroy(&baij->A)); 15 PetscCall(MatDestroy(&baij->B)); 16 #if defined(PETSC_USE_CTABLE) 17 PetscCall(PetscHMapIDestroy(&baij->colmap)); 18 #else 19 PetscCall(PetscFree(baij->colmap)); 20 #endif 21 PetscCall(PetscFree(baij->garray)); 22 PetscCall(VecDestroy(&baij->lvec)); 23 PetscCall(VecScatterDestroy(&baij->Mvctx)); 24 PetscCall(VecDestroy(&baij->slvec0)); 25 PetscCall(VecDestroy(&baij->slvec0b)); 26 PetscCall(VecDestroy(&baij->slvec1)); 27 PetscCall(VecDestroy(&baij->slvec1a)); 28 PetscCall(VecDestroy(&baij->slvec1b)); 29 PetscCall(VecScatterDestroy(&baij->sMvctx)); 30 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 31 PetscCall(PetscFree(baij->barray)); 32 PetscCall(PetscFree(baij->hd)); 33 PetscCall(VecDestroy(&baij->diag)); 34 PetscCall(VecDestroy(&baij->bb1)); 35 PetscCall(VecDestroy(&baij->xx1)); 36 #if defined(PETSC_USE_REAL_MAT_SINGLE) 37 PetscCall(PetscFree(baij->setvaluescopy)); 38 #endif 39 PetscCall(PetscFree(baij->in_loc)); 40 PetscCall(PetscFree(baij->v_loc)); 41 PetscCall(PetscFree(baij->rangebs)); 42 PetscCall(PetscFree(mat->data)); 43 44 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 45 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 47 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL)); 48 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL)); 49 #if defined(PETSC_HAVE_ELEMENTAL) 50 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL)); 51 #endif 52 #if defined(PETSC_HAVE_SCALAPACK) 53 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL)); 54 #endif 55 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL)); 56 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL)); 57 PetscFunctionReturn(PETSC_SUCCESS); 58 } 59 60 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), MatAssemblyEnd_MPI_Hash(), MatSetUp_MPI_Hash() */ 61 #define TYPE SBAIJ 62 #define TYPE_SBAIJ 63 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 64 #undef TYPE 65 #undef TYPE_SBAIJ 66 67 #if defined(PETSC_HAVE_ELEMENTAL) 68 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 69 #endif 70 #if defined(PETSC_HAVE_SCALAPACK) 71 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 72 #endif 73 74 /* This could be moved to matimpl.h */ 75 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) 76 { 77 Mat preallocator; 78 PetscInt r, rstart, rend; 79 PetscInt bs, i, m, n, M, N; 80 PetscBool cong = PETSC_TRUE; 81 82 PetscFunctionBegin; 83 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 84 PetscValidLogicalCollectiveInt(B, nm, 2); 85 for (i = 0; i < nm; i++) { 86 PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3); 87 PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong)); 88 PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts"); 89 } 90 PetscValidLogicalCollectiveBool(B, fill, 5); 91 PetscCall(MatGetBlockSize(B, &bs)); 92 PetscCall(MatGetSize(B, &M, &N)); 93 PetscCall(MatGetLocalSize(B, &m, &n)); 94 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator)); 95 PetscCall(MatSetType(preallocator, MATPREALLOCATOR)); 96 PetscCall(MatSetBlockSize(preallocator, bs)); 97 PetscCall(MatSetSizes(preallocator, m, n, M, N)); 98 PetscCall(MatSetUp(preallocator)); 99 PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend)); 100 for (r = rstart; r < rend; ++r) { 101 PetscInt ncols; 102 const PetscInt *row; 103 const PetscScalar *vals; 104 105 for (i = 0; i < nm; i++) { 106 PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals)); 107 PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES)); 108 if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES)); 109 PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals)); 110 } 111 } 112 PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY)); 113 PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY)); 114 PetscCall(MatPreallocatorPreallocate(preallocator, fill, B)); 115 PetscCall(MatDestroy(&preallocator)); 116 PetscFunctionReturn(PETSC_SUCCESS); 117 } 118 119 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 120 { 121 Mat B; 122 PetscInt r; 123 124 PetscFunctionBegin; 125 if (reuse != MAT_REUSE_MATRIX) { 126 PetscBool symm = PETSC_TRUE, isdense; 127 PetscInt bs; 128 129 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 130 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 131 PetscCall(MatSetType(B, newtype)); 132 PetscCall(MatGetBlockSize(A, &bs)); 133 PetscCall(MatSetBlockSize(B, bs)); 134 PetscCall(PetscLayoutSetUp(B->rmap)); 135 PetscCall(PetscLayoutSetUp(B->cmap)); 136 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, "")); 137 if (!isdense) { 138 PetscCall(MatGetRowUpperTriangular(A)); 139 PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE)); 140 PetscCall(MatRestoreRowUpperTriangular(A)); 141 } else { 142 PetscCall(MatSetUp(B)); 143 } 144 } else { 145 B = *newmat; 146 PetscCall(MatZeroEntries(B)); 147 } 148 149 PetscCall(MatGetRowUpperTriangular(A)); 150 for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 151 PetscInt ncols; 152 const PetscInt *row; 153 const PetscScalar *vals; 154 155 PetscCall(MatGetRow(A, r, &ncols, &row, &vals)); 156 PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES)); 157 #if defined(PETSC_USE_COMPLEX) 158 if (A->hermitian == PETSC_BOOL3_TRUE) { 159 PetscInt i; 160 for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES)); 161 } else { 162 PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 163 } 164 #else 165 PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 166 #endif 167 PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals)); 168 } 169 PetscCall(MatRestoreRowUpperTriangular(A)); 170 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 171 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 172 173 if (reuse == MAT_INPLACE_MATRIX) { 174 PetscCall(MatHeaderReplace(A, &B)); 175 } else { 176 *newmat = B; 177 } 178 PetscFunctionReturn(PETSC_SUCCESS); 179 } 180 181 static PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 182 { 183 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 184 185 PetscFunctionBegin; 186 PetscCall(MatStoreValues(aij->A)); 187 PetscCall(MatStoreValues(aij->B)); 188 PetscFunctionReturn(PETSC_SUCCESS); 189 } 190 191 static PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 192 { 193 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 194 195 PetscFunctionBegin; 196 PetscCall(MatRetrieveValues(aij->A)); 197 PetscCall(MatRetrieveValues(aij->B)); 198 PetscFunctionReturn(PETSC_SUCCESS); 199 } 200 201 #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 202 do { \ 203 brow = row / bs; \ 204 rp = aj + ai[brow]; \ 205 ap = aa + bs2 * ai[brow]; \ 206 rmax = aimax[brow]; \ 207 nrow = ailen[brow]; \ 208 bcol = col / bs; \ 209 ridx = row % bs; \ 210 cidx = col % bs; \ 211 low = 0; \ 212 high = nrow; \ 213 while (high - low > 3) { \ 214 t = (low + high) / 2; \ 215 if (rp[t] > bcol) high = t; \ 216 else low = t; \ 217 } \ 218 for (_i = low; _i < high; _i++) { \ 219 if (rp[_i] > bcol) break; \ 220 if (rp[_i] == bcol) { \ 221 bap = ap + bs2 * _i + bs * cidx + ridx; \ 222 if (addv == ADD_VALUES) *bap += value; \ 223 else *bap = value; \ 224 goto a_noinsert; \ 225 } \ 226 } \ 227 if (a->nonew == 1) goto a_noinsert; \ 228 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 229 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 230 N = nrow++ - 1; \ 231 /* shift up all the later entries in this row */ \ 232 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 233 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 234 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 235 rp[_i] = bcol; \ 236 ap[bs2 * _i + bs * cidx + ridx] = value; \ 237 A->nonzerostate++; \ 238 a_noinsert:; \ 239 ailen[brow] = nrow; \ 240 } while (0) 241 242 #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 243 do { \ 244 brow = row / bs; \ 245 rp = bj + bi[brow]; \ 246 ap = ba + bs2 * bi[brow]; \ 247 rmax = bimax[brow]; \ 248 nrow = bilen[brow]; \ 249 bcol = col / bs; \ 250 ridx = row % bs; \ 251 cidx = col % bs; \ 252 low = 0; \ 253 high = nrow; \ 254 while (high - low > 3) { \ 255 t = (low + high) / 2; \ 256 if (rp[t] > bcol) high = t; \ 257 else low = t; \ 258 } \ 259 for (_i = low; _i < high; _i++) { \ 260 if (rp[_i] > bcol) break; \ 261 if (rp[_i] == bcol) { \ 262 bap = ap + bs2 * _i + bs * cidx + ridx; \ 263 if (addv == ADD_VALUES) *bap += value; \ 264 else *bap = value; \ 265 goto b_noinsert; \ 266 } \ 267 } \ 268 if (b->nonew == 1) goto b_noinsert; \ 269 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 270 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 271 N = nrow++ - 1; \ 272 /* shift up all the later entries in this row */ \ 273 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 274 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 275 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 276 rp[_i] = bcol; \ 277 ap[bs2 * _i + bs * cidx + ridx] = value; \ 278 B->nonzerostate++; \ 279 b_noinsert:; \ 280 bilen[brow] = nrow; \ 281 } while (0) 282 283 /* Only add/insert a(i,j) with i<=j (blocks). 284 Any a(i,j) with i>j input by user is ignored or generates an error 285 */ 286 static PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 287 { 288 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 289 MatScalar value; 290 PetscBool roworiented = baij->roworiented; 291 PetscInt i, j, row, col; 292 PetscInt rstart_orig = mat->rmap->rstart; 293 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 294 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 295 296 /* Some Variables required in the macro */ 297 Mat A = baij->A; 298 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)(A)->data; 299 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 300 MatScalar *aa = a->a; 301 302 Mat B = baij->B; 303 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(B)->data; 304 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 305 MatScalar *ba = b->a; 306 307 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 308 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 309 MatScalar *ap, *bap; 310 311 /* for stash */ 312 PetscInt n_loc, *in_loc = NULL; 313 MatScalar *v_loc = NULL; 314 315 PetscFunctionBegin; 316 if (!baij->donotstash) { 317 if (n > baij->n_loc) { 318 PetscCall(PetscFree(baij->in_loc)); 319 PetscCall(PetscFree(baij->v_loc)); 320 PetscCall(PetscMalloc1(n, &baij->in_loc)); 321 PetscCall(PetscMalloc1(n, &baij->v_loc)); 322 323 baij->n_loc = n; 324 } 325 in_loc = baij->in_loc; 326 v_loc = baij->v_loc; 327 } 328 329 for (i = 0; i < m; i++) { 330 if (im[i] < 0) continue; 331 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 332 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 333 row = im[i] - rstart_orig; /* local row index */ 334 for (j = 0; j < n; j++) { 335 if (im[i] / bs > in[j] / bs) { 336 if (a->ignore_ltriangular) { 337 continue; /* ignore lower triangular blocks */ 338 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 339 } 340 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 341 col = in[j] - cstart_orig; /* local col index */ 342 brow = row / bs; 343 bcol = col / bs; 344 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 345 if (roworiented) value = v[i * n + j]; 346 else value = v[i + j * m]; 347 MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 348 /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */ 349 } else if (in[j] < 0) { 350 continue; 351 } else { 352 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 353 /* off-diag entry (B) */ 354 if (mat->was_assembled) { 355 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 356 #if defined(PETSC_USE_CTABLE) 357 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 358 col = col - 1; 359 #else 360 col = baij->colmap[in[j] / bs] - 1; 361 #endif 362 if (col < 0 && !((Mat_SeqSBAIJ *)(baij->A->data))->nonew) { 363 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 364 col = in[j]; 365 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 366 B = baij->B; 367 b = (Mat_SeqBAIJ *)(B)->data; 368 bimax = b->imax; 369 bi = b->i; 370 bilen = b->ilen; 371 bj = b->j; 372 ba = b->a; 373 } else col += in[j] % bs; 374 } else col = in[j]; 375 if (roworiented) value = v[i * n + j]; 376 else value = v[i + j * m]; 377 MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 378 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 379 } 380 } 381 } else { /* off processor entry */ 382 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 383 if (!baij->donotstash) { 384 mat->assembled = PETSC_FALSE; 385 n_loc = 0; 386 for (j = 0; j < n; j++) { 387 if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */ 388 in_loc[n_loc] = in[j]; 389 if (roworiented) { 390 v_loc[n_loc] = v[i * n + j]; 391 } else { 392 v_loc[n_loc] = v[j * m + i]; 393 } 394 n_loc++; 395 } 396 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE)); 397 } 398 } 399 } 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 404 { 405 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 406 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 407 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 408 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 409 PetscBool roworiented = a->roworiented; 410 const PetscScalar *value = v; 411 MatScalar *ap, *aa = a->a, *bap; 412 413 PetscFunctionBegin; 414 if (col < row) { 415 if (a->ignore_ltriangular) PetscFunctionReturn(PETSC_SUCCESS); /* ignore lower triangular block */ 416 else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 417 } 418 rp = aj + ai[row]; 419 ap = aa + bs2 * ai[row]; 420 rmax = imax[row]; 421 nrow = ailen[row]; 422 value = v; 423 low = 0; 424 high = nrow; 425 426 while (high - low > 7) { 427 t = (low + high) / 2; 428 if (rp[t] > col) high = t; 429 else low = t; 430 } 431 for (i = low; i < high; i++) { 432 if (rp[i] > col) break; 433 if (rp[i] == col) { 434 bap = ap + bs2 * i; 435 if (roworiented) { 436 if (is == ADD_VALUES) { 437 for (ii = 0; ii < bs; ii++) { 438 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 439 } 440 } else { 441 for (ii = 0; ii < bs; ii++) { 442 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 443 } 444 } 445 } else { 446 if (is == ADD_VALUES) { 447 for (ii = 0; ii < bs; ii++) { 448 for (jj = 0; jj < bs; jj++) *bap++ += *value++; 449 } 450 } else { 451 for (ii = 0; ii < bs; ii++) { 452 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 453 } 454 } 455 } 456 goto noinsert2; 457 } 458 } 459 if (nonew == 1) goto noinsert2; 460 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 461 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 462 N = nrow++ - 1; 463 high++; 464 /* shift up all the later entries in this row */ 465 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 466 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 467 rp[i] = col; 468 bap = ap + bs2 * i; 469 if (roworiented) { 470 for (ii = 0; ii < bs; ii++) { 471 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 472 } 473 } else { 474 for (ii = 0; ii < bs; ii++) { 475 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 476 } 477 } 478 noinsert2:; 479 ailen[row] = nrow; 480 PetscFunctionReturn(PETSC_SUCCESS); 481 } 482 483 /* 484 This routine is exactly duplicated in mpibaij.c 485 */ 486 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 487 { 488 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 489 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 490 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 491 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 492 PetscBool roworiented = a->roworiented; 493 const PetscScalar *value = v; 494 MatScalar *ap, *aa = a->a, *bap; 495 496 PetscFunctionBegin; 497 rp = aj + ai[row]; 498 ap = aa + bs2 * ai[row]; 499 rmax = imax[row]; 500 nrow = ailen[row]; 501 low = 0; 502 high = nrow; 503 value = v; 504 while (high - low > 7) { 505 t = (low + high) / 2; 506 if (rp[t] > col) high = t; 507 else low = t; 508 } 509 for (i = low; i < high; i++) { 510 if (rp[i] > col) break; 511 if (rp[i] == col) { 512 bap = ap + bs2 * i; 513 if (roworiented) { 514 if (is == ADD_VALUES) { 515 for (ii = 0; ii < bs; ii++) { 516 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 517 } 518 } else { 519 for (ii = 0; ii < bs; ii++) { 520 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 521 } 522 } 523 } else { 524 if (is == ADD_VALUES) { 525 for (ii = 0; ii < bs; ii++, value += bs) { 526 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 527 bap += bs; 528 } 529 } else { 530 for (ii = 0; ii < bs; ii++, value += bs) { 531 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 532 bap += bs; 533 } 534 } 535 } 536 goto noinsert2; 537 } 538 } 539 if (nonew == 1) goto noinsert2; 540 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 541 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 542 N = nrow++ - 1; 543 high++; 544 /* shift up all the later entries in this row */ 545 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 546 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 547 rp[i] = col; 548 bap = ap + bs2 * i; 549 if (roworiented) { 550 for (ii = 0; ii < bs; ii++) { 551 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 552 } 553 } else { 554 for (ii = 0; ii < bs; ii++) { 555 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 556 } 557 } 558 noinsert2:; 559 ailen[row] = nrow; 560 PetscFunctionReturn(PETSC_SUCCESS); 561 } 562 563 /* 564 This routine could be optimized by removing the need for the block copy below and passing stride information 565 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 566 */ 567 static PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv) 568 { 569 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 570 const MatScalar *value; 571 MatScalar *barray = baij->barray; 572 PetscBool roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular; 573 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 574 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 575 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 576 577 PetscFunctionBegin; 578 if (!barray) { 579 PetscCall(PetscMalloc1(bs2, &barray)); 580 baij->barray = barray; 581 } 582 583 if (roworiented) { 584 stepval = (n - 1) * bs; 585 } else { 586 stepval = (m - 1) * bs; 587 } 588 for (i = 0; i < m; i++) { 589 if (im[i] < 0) continue; 590 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 591 if (im[i] >= rstart && im[i] < rend) { 592 row = im[i] - rstart; 593 for (j = 0; j < n; j++) { 594 if (im[i] > in[j]) { 595 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 596 else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 597 } 598 /* If NumCol = 1 then a copy is not required */ 599 if ((roworiented) && (n == 1)) { 600 barray = (MatScalar *)v + i * bs2; 601 } else if ((!roworiented) && (m == 1)) { 602 barray = (MatScalar *)v + j * bs2; 603 } else { /* Here a copy is required */ 604 if (roworiented) { 605 value = v + i * (stepval + bs) * bs + j * bs; 606 } else { 607 value = v + j * (stepval + bs) * bs + i * bs; 608 } 609 for (ii = 0; ii < bs; ii++, value += stepval) { 610 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 611 } 612 barray -= bs2; 613 } 614 615 if (in[j] >= cstart && in[j] < cend) { 616 col = in[j] - cstart; 617 PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 618 } else if (in[j] < 0) { 619 continue; 620 } else { 621 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 622 if (mat->was_assembled) { 623 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 624 625 #if defined(PETSC_USE_DEBUG) 626 #if defined(PETSC_USE_CTABLE) 627 { 628 PetscInt data; 629 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 630 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 631 } 632 #else 633 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 634 #endif 635 #endif 636 #if defined(PETSC_USE_CTABLE) 637 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 638 col = (col - 1) / bs; 639 #else 640 col = (baij->colmap[in[j]] - 1) / bs; 641 #endif 642 if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 643 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 644 col = in[j]; 645 } 646 } else col = in[j]; 647 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 648 } 649 } 650 } else { 651 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 652 if (!baij->donotstash) { 653 if (roworiented) { 654 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 655 } else { 656 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 657 } 658 } 659 } 660 } 661 PetscFunctionReturn(PETSC_SUCCESS); 662 } 663 664 static PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 665 { 666 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 667 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 668 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 669 670 PetscFunctionBegin; 671 for (i = 0; i < m; i++) { 672 if (idxm[i] < 0) continue; /* negative row */ 673 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 674 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 675 row = idxm[i] - bsrstart; 676 for (j = 0; j < n; j++) { 677 if (idxn[j] < 0) continue; /* negative column */ 678 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 679 if (idxn[j] >= bscstart && idxn[j] < bscend) { 680 col = idxn[j] - bscstart; 681 PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 682 } else { 683 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 684 #if defined(PETSC_USE_CTABLE) 685 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 686 data--; 687 #else 688 data = baij->colmap[idxn[j] / bs] - 1; 689 #endif 690 if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 691 else { 692 col = data + idxn[j] % bs; 693 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 694 } 695 } 696 } 697 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 698 } 699 PetscFunctionReturn(PETSC_SUCCESS); 700 } 701 702 static PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm) 703 { 704 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 705 PetscReal sum[2], *lnorm2; 706 707 PetscFunctionBegin; 708 if (baij->size == 1) { 709 PetscCall(MatNorm(baij->A, type, norm)); 710 } else { 711 if (type == NORM_FROBENIUS) { 712 PetscCall(PetscMalloc1(2, &lnorm2)); 713 PetscCall(MatNorm(baij->A, type, lnorm2)); 714 *lnorm2 = (*lnorm2) * (*lnorm2); 715 lnorm2++; /* squar power of norm(A) */ 716 PetscCall(MatNorm(baij->B, type, lnorm2)); 717 *lnorm2 = (*lnorm2) * (*lnorm2); 718 lnorm2--; /* squar power of norm(B) */ 719 PetscCall(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 720 *norm = PetscSqrtReal(sum[0] + 2 * sum[1]); 721 PetscCall(PetscFree(lnorm2)); 722 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 723 Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data; 724 Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ *)baij->B->data; 725 PetscReal *rsum, *rsum2, vabs; 726 PetscInt *jj, *garray = baij->garray, rstart = baij->rstartbs, nz; 727 PetscInt brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs; 728 MatScalar *v; 729 730 PetscCall(PetscMalloc2(mat->cmap->N, &rsum, mat->cmap->N, &rsum2)); 731 PetscCall(PetscArrayzero(rsum, mat->cmap->N)); 732 /* Amat */ 733 v = amat->a; 734 jj = amat->j; 735 for (brow = 0; brow < mbs; brow++) { 736 grow = bs * (rstart + brow); 737 nz = amat->i[brow + 1] - amat->i[brow]; 738 for (bcol = 0; bcol < nz; bcol++) { 739 gcol = bs * (rstart + *jj); 740 jj++; 741 for (col = 0; col < bs; col++) { 742 for (row = 0; row < bs; row++) { 743 vabs = PetscAbsScalar(*v); 744 v++; 745 rsum[gcol + col] += vabs; 746 /* non-diagonal block */ 747 if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs; 748 } 749 } 750 } 751 PetscCall(PetscLogFlops(nz * bs * bs)); 752 } 753 /* Bmat */ 754 v = bmat->a; 755 jj = bmat->j; 756 for (brow = 0; brow < mbs; brow++) { 757 grow = bs * (rstart + brow); 758 nz = bmat->i[brow + 1] - bmat->i[brow]; 759 for (bcol = 0; bcol < nz; bcol++) { 760 gcol = bs * garray[*jj]; 761 jj++; 762 for (col = 0; col < bs; col++) { 763 for (row = 0; row < bs; row++) { 764 vabs = PetscAbsScalar(*v); 765 v++; 766 rsum[gcol + col] += vabs; 767 rsum[grow + row] += vabs; 768 } 769 } 770 } 771 PetscCall(PetscLogFlops(nz * bs * bs)); 772 } 773 PetscCall(MPIU_Allreduce(rsum, rsum2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 774 *norm = 0.0; 775 for (col = 0; col < mat->cmap->N; col++) { 776 if (rsum2[col] > *norm) *norm = rsum2[col]; 777 } 778 PetscCall(PetscFree2(rsum, rsum2)); 779 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet"); 780 } 781 PetscFunctionReturn(PETSC_SUCCESS); 782 } 783 784 static PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode) 785 { 786 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 787 PetscInt nstash, reallocs; 788 789 PetscFunctionBegin; 790 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 791 792 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 793 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 794 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 795 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 796 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 797 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 798 PetscFunctionReturn(PETSC_SUCCESS); 799 } 800 801 static PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode) 802 { 803 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 804 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)baij->A->data; 805 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 806 PetscInt *row, *col; 807 PetscBool other_disassembled; 808 PetscMPIInt n; 809 PetscBool r1, r2, r3; 810 MatScalar *val; 811 812 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 813 PetscFunctionBegin; 814 if (!baij->donotstash && !mat->nooffprocentries) { 815 while (1) { 816 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 817 if (!flg) break; 818 819 for (i = 0; i < n;) { 820 /* Now identify the consecutive vals belonging to the same row */ 821 for (j = i, rstart = row[j]; j < n; j++) { 822 if (row[j] != rstart) break; 823 } 824 if (j < n) ncols = j - i; 825 else ncols = n - i; 826 /* Now assemble all these values with a single function call */ 827 PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 828 i = j; 829 } 830 } 831 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 832 /* Now process the block-stash. Since the values are stashed column-oriented, 833 set the roworiented flag to column oriented, and after MatSetValues() 834 restore the original flags */ 835 r1 = baij->roworiented; 836 r2 = a->roworiented; 837 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 838 839 baij->roworiented = PETSC_FALSE; 840 a->roworiented = PETSC_FALSE; 841 842 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 843 while (1) { 844 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 845 if (!flg) break; 846 847 for (i = 0; i < n;) { 848 /* Now identify the consecutive vals belonging to the same row */ 849 for (j = i, rstart = row[j]; j < n; j++) { 850 if (row[j] != rstart) break; 851 } 852 if (j < n) ncols = j - i; 853 else ncols = n - i; 854 PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 855 i = j; 856 } 857 } 858 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 859 860 baij->roworiented = r1; 861 a->roworiented = r2; 862 863 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */ 864 } 865 866 PetscCall(MatAssemblyBegin(baij->A, mode)); 867 PetscCall(MatAssemblyEnd(baij->A, mode)); 868 869 /* determine if any processor has disassembled, if so we must 870 also disassemble ourselves, in order that we may reassemble. */ 871 /* 872 if nonzero structure of submatrix B cannot change then we know that 873 no processor disassembled thus we can skip this stuff 874 */ 875 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 876 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 877 if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISBAIJ(mat)); 878 } 879 880 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ } 881 PetscCall(MatAssemblyBegin(baij->B, mode)); 882 PetscCall(MatAssemblyEnd(baij->B, mode)); 883 884 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 885 886 baij->rowvalues = NULL; 887 888 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 889 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 890 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 891 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 892 } 893 PetscFunctionReturn(PETSC_SUCCESS); 894 } 895 896 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode); 897 #include <petscdraw.h> 898 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 899 { 900 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 901 PetscInt bs = mat->rmap->bs; 902 PetscMPIInt rank = baij->rank; 903 PetscBool iascii, isdraw; 904 PetscViewer sviewer; 905 PetscViewerFormat format; 906 907 PetscFunctionBegin; 908 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 909 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 910 if (iascii) { 911 PetscCall(PetscViewerGetFormat(viewer, &format)); 912 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 913 MatInfo info; 914 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 915 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 916 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 917 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 918 mat->rmap->bs, (double)info.memory)); 919 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 920 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 921 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 922 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 923 PetscCall(PetscViewerFlush(viewer)); 924 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 925 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 926 PetscCall(VecScatterView(baij->Mvctx, viewer)); 927 PetscFunctionReturn(PETSC_SUCCESS); 928 } else if (format == PETSC_VIEWER_ASCII_INFO) { 929 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 930 PetscFunctionReturn(PETSC_SUCCESS); 931 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 932 PetscFunctionReturn(PETSC_SUCCESS); 933 } 934 } 935 936 if (isdraw) { 937 PetscDraw draw; 938 PetscBool isnull; 939 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 940 PetscCall(PetscDrawIsNull(draw, &isnull)); 941 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 942 } 943 944 { 945 /* assemble the entire matrix onto first processor. */ 946 Mat A; 947 Mat_SeqSBAIJ *Aloc; 948 Mat_SeqBAIJ *Bloc; 949 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 950 MatScalar *a; 951 const char *matname; 952 953 /* Should this be the same type as mat? */ 954 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 955 if (rank == 0) { 956 PetscCall(MatSetSizes(A, M, N, M, N)); 957 } else { 958 PetscCall(MatSetSizes(A, 0, 0, M, N)); 959 } 960 PetscCall(MatSetType(A, MATMPISBAIJ)); 961 PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 962 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 963 964 /* copy over the A part */ 965 Aloc = (Mat_SeqSBAIJ *)baij->A->data; 966 ai = Aloc->i; 967 aj = Aloc->j; 968 a = Aloc->a; 969 PetscCall(PetscMalloc1(bs, &rvals)); 970 971 for (i = 0; i < mbs; i++) { 972 rvals[0] = bs * (baij->rstartbs + i); 973 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 974 for (j = ai[i]; j < ai[i + 1]; j++) { 975 col = (baij->cstartbs + aj[j]) * bs; 976 for (k = 0; k < bs; k++) { 977 PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 978 col++; 979 a += bs; 980 } 981 } 982 } 983 /* copy over the B part */ 984 Bloc = (Mat_SeqBAIJ *)baij->B->data; 985 ai = Bloc->i; 986 aj = Bloc->j; 987 a = Bloc->a; 988 for (i = 0; i < mbs; i++) { 989 rvals[0] = bs * (baij->rstartbs + i); 990 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 991 for (j = ai[i]; j < ai[i + 1]; j++) { 992 col = baij->garray[aj[j]] * bs; 993 for (k = 0; k < bs; k++) { 994 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 995 col++; 996 a += bs; 997 } 998 } 999 } 1000 PetscCall(PetscFree(rvals)); 1001 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1002 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1003 /* 1004 Everyone has to call to draw the matrix since the graphics waits are 1005 synchronized across all processors that share the PetscDraw object 1006 */ 1007 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1008 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 1009 if (rank == 0) { 1010 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)(A->data))->A, matname)); 1011 PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)(A->data))->A, sviewer)); 1012 } 1013 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1014 PetscCall(PetscViewerFlush(viewer)); 1015 PetscCall(MatDestroy(&A)); 1016 } 1017 PetscFunctionReturn(PETSC_SUCCESS); 1018 } 1019 1020 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1021 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary 1022 1023 static PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer) 1024 { 1025 PetscBool iascii, isdraw, issocket, isbinary; 1026 1027 PetscFunctionBegin; 1028 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1029 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1030 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1031 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1032 if (iascii || isdraw || issocket) { 1033 PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer)); 1034 } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer)); 1035 PetscFunctionReturn(PETSC_SUCCESS); 1036 } 1037 1038 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy) 1039 { 1040 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1041 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1042 PetscScalar *from; 1043 const PetscScalar *x; 1044 1045 PetscFunctionBegin; 1046 /* diagonal part */ 1047 PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 1048 /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */ 1049 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1050 PetscCall(VecZeroEntries(a->slvec1b)); 1051 1052 /* subdiagonal part */ 1053 PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 1054 PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1055 1056 /* copy x into the vec slvec0 */ 1057 PetscCall(VecGetArray(a->slvec0, &from)); 1058 PetscCall(VecGetArrayRead(xx, &x)); 1059 1060 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1061 PetscCall(VecRestoreArray(a->slvec0, &from)); 1062 PetscCall(VecRestoreArrayRead(xx, &x)); 1063 1064 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1065 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1066 /* supperdiagonal part */ 1067 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 1068 PetscFunctionReturn(PETSC_SUCCESS); 1069 } 1070 1071 static PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy) 1072 { 1073 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1074 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1075 PetscScalar *from; 1076 const PetscScalar *x; 1077 1078 PetscFunctionBegin; 1079 /* diagonal part */ 1080 PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 1081 /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */ 1082 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1083 PetscCall(VecZeroEntries(a->slvec1b)); 1084 1085 /* subdiagonal part */ 1086 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1087 1088 /* copy x into the vec slvec0 */ 1089 PetscCall(VecGetArray(a->slvec0, &from)); 1090 PetscCall(VecGetArrayRead(xx, &x)); 1091 1092 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1093 PetscCall(VecRestoreArray(a->slvec0, &from)); 1094 PetscCall(VecRestoreArrayRead(xx, &x)); 1095 1096 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1097 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1098 /* supperdiagonal part */ 1099 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 1100 PetscFunctionReturn(PETSC_SUCCESS); 1101 } 1102 1103 #if PetscDefined(USE_COMPLEX) 1104 static PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz) 1105 { 1106 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1107 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1108 PetscScalar *from; 1109 const PetscScalar *x; 1110 1111 PetscFunctionBegin; 1112 /* diagonal part */ 1113 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 1114 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1115 PetscCall(VecZeroEntries(a->slvec1b)); 1116 1117 /* subdiagonal part */ 1118 PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 1119 PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1120 1121 /* copy x into the vec slvec0 */ 1122 PetscCall(VecGetArray(a->slvec0, &from)); 1123 PetscCall(VecGetArrayRead(xx, &x)); 1124 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1125 PetscCall(VecRestoreArray(a->slvec0, &from)); 1126 1127 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1128 PetscCall(VecRestoreArrayRead(xx, &x)); 1129 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1130 1131 /* supperdiagonal part */ 1132 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 1133 PetscFunctionReturn(PETSC_SUCCESS); 1134 } 1135 #endif 1136 1137 static PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1138 { 1139 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1140 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1141 PetscScalar *from; 1142 const PetscScalar *x; 1143 1144 PetscFunctionBegin; 1145 /* diagonal part */ 1146 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 1147 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1148 PetscCall(VecZeroEntries(a->slvec1b)); 1149 1150 /* subdiagonal part */ 1151 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1152 1153 /* copy x into the vec slvec0 */ 1154 PetscCall(VecGetArray(a->slvec0, &from)); 1155 PetscCall(VecGetArrayRead(xx, &x)); 1156 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1157 PetscCall(VecRestoreArray(a->slvec0, &from)); 1158 1159 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1160 PetscCall(VecRestoreArrayRead(xx, &x)); 1161 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1162 1163 /* supperdiagonal part */ 1164 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 1165 PetscFunctionReturn(PETSC_SUCCESS); 1166 } 1167 1168 /* 1169 This only works correctly for square matrices where the subblock A->A is the 1170 diagonal block 1171 */ 1172 static PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v) 1173 { 1174 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1175 1176 PetscFunctionBegin; 1177 /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1178 PetscCall(MatGetDiagonal(a->A, v)); 1179 PetscFunctionReturn(PETSC_SUCCESS); 1180 } 1181 1182 static PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa) 1183 { 1184 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1185 1186 PetscFunctionBegin; 1187 PetscCall(MatScale(a->A, aa)); 1188 PetscCall(MatScale(a->B, aa)); 1189 PetscFunctionReturn(PETSC_SUCCESS); 1190 } 1191 1192 static PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1193 { 1194 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 1195 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1196 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1197 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1198 PetscInt *cmap, *idx_p, cstart = mat->rstartbs; 1199 1200 PetscFunctionBegin; 1201 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1202 mat->getrowactive = PETSC_TRUE; 1203 1204 if (!mat->rowvalues && (idx || v)) { 1205 /* 1206 allocate enough space to hold information from the longest row. 1207 */ 1208 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ *)mat->A->data; 1209 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ *)mat->B->data; 1210 PetscInt max = 1, mbs = mat->mbs, tmp; 1211 for (i = 0; i < mbs; i++) { 1212 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */ 1213 if (max < tmp) max = tmp; 1214 } 1215 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1216 } 1217 1218 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1219 lrow = row - brstart; /* local row index */ 1220 1221 pvA = &vworkA; 1222 pcA = &cworkA; 1223 pvB = &vworkB; 1224 pcB = &cworkB; 1225 if (!v) { 1226 pvA = NULL; 1227 pvB = NULL; 1228 } 1229 if (!idx) { 1230 pcA = NULL; 1231 if (!v) pcB = NULL; 1232 } 1233 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1234 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1235 nztot = nzA + nzB; 1236 1237 cmap = mat->garray; 1238 if (v || idx) { 1239 if (nztot) { 1240 /* Sort by increasing column numbers, assuming A and B already sorted */ 1241 PetscInt imark = -1; 1242 if (v) { 1243 *v = v_p = mat->rowvalues; 1244 for (i = 0; i < nzB; i++) { 1245 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1246 else break; 1247 } 1248 imark = i; 1249 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1250 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1251 } 1252 if (idx) { 1253 *idx = idx_p = mat->rowindices; 1254 if (imark > -1) { 1255 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1256 } else { 1257 for (i = 0; i < nzB; i++) { 1258 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1259 else break; 1260 } 1261 imark = i; 1262 } 1263 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1264 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1265 } 1266 } else { 1267 if (idx) *idx = NULL; 1268 if (v) *v = NULL; 1269 } 1270 } 1271 *nz = nztot; 1272 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1273 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1274 PetscFunctionReturn(PETSC_SUCCESS); 1275 } 1276 1277 static PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1278 { 1279 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1280 1281 PetscFunctionBegin; 1282 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1283 baij->getrowactive = PETSC_FALSE; 1284 PetscFunctionReturn(PETSC_SUCCESS); 1285 } 1286 1287 static PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1288 { 1289 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1290 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1291 1292 PetscFunctionBegin; 1293 aA->getrow_utriangular = PETSC_TRUE; 1294 PetscFunctionReturn(PETSC_SUCCESS); 1295 } 1296 static PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1297 { 1298 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1299 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1300 1301 PetscFunctionBegin; 1302 aA->getrow_utriangular = PETSC_FALSE; 1303 PetscFunctionReturn(PETSC_SUCCESS); 1304 } 1305 1306 static PetscErrorCode MatConjugate_MPISBAIJ(Mat mat) 1307 { 1308 PetscFunctionBegin; 1309 if (PetscDefined(USE_COMPLEX)) { 1310 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data; 1311 1312 PetscCall(MatConjugate(a->A)); 1313 PetscCall(MatConjugate(a->B)); 1314 } 1315 PetscFunctionReturn(PETSC_SUCCESS); 1316 } 1317 1318 static PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1319 { 1320 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1321 1322 PetscFunctionBegin; 1323 PetscCall(MatRealPart(a->A)); 1324 PetscCall(MatRealPart(a->B)); 1325 PetscFunctionReturn(PETSC_SUCCESS); 1326 } 1327 1328 static PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1329 { 1330 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1331 1332 PetscFunctionBegin; 1333 PetscCall(MatImaginaryPart(a->A)); 1334 PetscCall(MatImaginaryPart(a->B)); 1335 PetscFunctionReturn(PETSC_SUCCESS); 1336 } 1337 1338 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 1339 Input: isrow - distributed(parallel), 1340 iscol_local - locally owned (seq) 1341 */ 1342 static PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg) 1343 { 1344 PetscInt sz1, sz2, *a1, *a2, i, j, k, nmatch; 1345 const PetscInt *ptr1, *ptr2; 1346 1347 PetscFunctionBegin; 1348 *flg = PETSC_FALSE; 1349 PetscCall(ISGetLocalSize(isrow, &sz1)); 1350 PetscCall(ISGetLocalSize(iscol_local, &sz2)); 1351 if (sz1 > sz2) PetscFunctionReturn(PETSC_SUCCESS); 1352 1353 PetscCall(ISGetIndices(isrow, &ptr1)); 1354 PetscCall(ISGetIndices(iscol_local, &ptr2)); 1355 1356 PetscCall(PetscMalloc1(sz1, &a1)); 1357 PetscCall(PetscMalloc1(sz2, &a2)); 1358 PetscCall(PetscArraycpy(a1, ptr1, sz1)); 1359 PetscCall(PetscArraycpy(a2, ptr2, sz2)); 1360 PetscCall(PetscSortInt(sz1, a1)); 1361 PetscCall(PetscSortInt(sz2, a2)); 1362 1363 nmatch = 0; 1364 k = 0; 1365 for (i = 0; i < sz1; i++) { 1366 for (j = k; j < sz2; j++) { 1367 if (a1[i] == a2[j]) { 1368 k = j; 1369 nmatch++; 1370 break; 1371 } 1372 } 1373 } 1374 PetscCall(ISRestoreIndices(isrow, &ptr1)); 1375 PetscCall(ISRestoreIndices(iscol_local, &ptr2)); 1376 PetscCall(PetscFree(a1)); 1377 PetscCall(PetscFree(a2)); 1378 if (nmatch < sz1) { 1379 *flg = PETSC_FALSE; 1380 } else { 1381 *flg = PETSC_TRUE; 1382 } 1383 PetscFunctionReturn(PETSC_SUCCESS); 1384 } 1385 1386 static PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1387 { 1388 Mat C[2]; 1389 IS iscol_local, isrow_local; 1390 PetscInt csize, csize_local, rsize; 1391 PetscBool isequal, issorted, isidentity = PETSC_FALSE; 1392 1393 PetscFunctionBegin; 1394 PetscCall(ISGetLocalSize(iscol, &csize)); 1395 PetscCall(ISGetLocalSize(isrow, &rsize)); 1396 if (call == MAT_REUSE_MATRIX) { 1397 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1398 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1399 } else { 1400 PetscCall(ISAllGather(iscol, &iscol_local)); 1401 PetscCall(ISSorted(iscol_local, &issorted)); 1402 PetscCheck(issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must be sorted"); 1403 } 1404 PetscCall(ISEqual_private(isrow, iscol_local, &isequal)); 1405 if (!isequal) { 1406 PetscCall(ISGetLocalSize(iscol_local, &csize_local)); 1407 isidentity = (PetscBool)(mat->cmap->N == csize_local); 1408 if (!isidentity) { 1409 if (call == MAT_REUSE_MATRIX) { 1410 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather_other", (PetscObject *)&isrow_local)); 1411 PetscCheck(isrow_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1412 } else { 1413 PetscCall(ISAllGather(isrow, &isrow_local)); 1414 PetscCall(ISSorted(isrow_local, &issorted)); 1415 PetscCheck(issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, isrow must be sorted"); 1416 } 1417 } 1418 } 1419 /* now call MatCreateSubMatrix_MPIBAIJ() */ 1420 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, isequal || isidentity ? call : MAT_INITIAL_MATRIX, isequal || isidentity ? newmat : C, (PetscBool)(isequal || isidentity))); 1421 if (!isequal && !isidentity) { 1422 if (call == MAT_INITIAL_MATRIX) { 1423 IS intersect; 1424 PetscInt ni; 1425 1426 PetscCall(ISIntersect(isrow_local, iscol_local, &intersect)); 1427 PetscCall(ISGetLocalSize(intersect, &ni)); 1428 PetscCall(ISDestroy(&intersect)); 1429 PetscCheck(ni == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create such a submatrix: for symmetric format, when requesting an off-diagonal submatrix, isrow and iscol should have an empty intersection (number of common indices is %" PetscInt_FMT ")", ni); 1430 } 1431 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, iscol, isrow_local, rsize, MAT_INITIAL_MATRIX, C + 1, PETSC_FALSE)); 1432 PetscCall(MatTranspose(C[1], MAT_INPLACE_MATRIX, C + 1)); 1433 PetscCall(MatAXPY(C[0], 1.0, C[1], DIFFERENT_NONZERO_PATTERN)); 1434 if (call == MAT_REUSE_MATRIX) PetscCall(MatCopy(C[0], *newmat, SAME_NONZERO_PATTERN)); 1435 else if (mat->rmap->bs == 1) PetscCall(MatConvert(C[0], MATAIJ, MAT_INITIAL_MATRIX, newmat)); 1436 else PetscCall(MatCopy(C[0], *newmat, SAME_NONZERO_PATTERN)); 1437 PetscCall(MatDestroy(C)); 1438 PetscCall(MatDestroy(C + 1)); 1439 } 1440 if (call == MAT_INITIAL_MATRIX) { 1441 if (!isequal && !isidentity) { 1442 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather_other", (PetscObject)isrow_local)); 1443 PetscCall(ISDestroy(&isrow_local)); 1444 } 1445 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1446 PetscCall(ISDestroy(&iscol_local)); 1447 } 1448 PetscFunctionReturn(PETSC_SUCCESS); 1449 } 1450 1451 static PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1452 { 1453 Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data; 1454 1455 PetscFunctionBegin; 1456 PetscCall(MatZeroEntries(l->A)); 1457 PetscCall(MatZeroEntries(l->B)); 1458 PetscFunctionReturn(PETSC_SUCCESS); 1459 } 1460 1461 static PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1462 { 1463 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)matin->data; 1464 Mat A = a->A, B = a->B; 1465 PetscLogDouble isend[5], irecv[5]; 1466 1467 PetscFunctionBegin; 1468 info->block_size = (PetscReal)matin->rmap->bs; 1469 1470 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1471 1472 isend[0] = info->nz_used; 1473 isend[1] = info->nz_allocated; 1474 isend[2] = info->nz_unneeded; 1475 isend[3] = info->memory; 1476 isend[4] = info->mallocs; 1477 1478 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1479 1480 isend[0] += info->nz_used; 1481 isend[1] += info->nz_allocated; 1482 isend[2] += info->nz_unneeded; 1483 isend[3] += info->memory; 1484 isend[4] += info->mallocs; 1485 if (flag == MAT_LOCAL) { 1486 info->nz_used = isend[0]; 1487 info->nz_allocated = isend[1]; 1488 info->nz_unneeded = isend[2]; 1489 info->memory = isend[3]; 1490 info->mallocs = isend[4]; 1491 } else if (flag == MAT_GLOBAL_MAX) { 1492 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1493 1494 info->nz_used = irecv[0]; 1495 info->nz_allocated = irecv[1]; 1496 info->nz_unneeded = irecv[2]; 1497 info->memory = irecv[3]; 1498 info->mallocs = irecv[4]; 1499 } else if (flag == MAT_GLOBAL_SUM) { 1500 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1501 1502 info->nz_used = irecv[0]; 1503 info->nz_allocated = irecv[1]; 1504 info->nz_unneeded = irecv[2]; 1505 info->memory = irecv[3]; 1506 info->mallocs = irecv[4]; 1507 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1508 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1509 info->fill_ratio_needed = 0; 1510 info->factor_mallocs = 0; 1511 PetscFunctionReturn(PETSC_SUCCESS); 1512 } 1513 1514 static PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg) 1515 { 1516 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1517 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1518 1519 PetscFunctionBegin; 1520 switch (op) { 1521 case MAT_NEW_NONZERO_LOCATIONS: 1522 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1523 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1524 case MAT_KEEP_NONZERO_PATTERN: 1525 case MAT_SUBMAT_SINGLEIS: 1526 case MAT_NEW_NONZERO_LOCATION_ERR: 1527 MatCheckPreallocated(A, 1); 1528 PetscCall(MatSetOption(a->A, op, flg)); 1529 PetscCall(MatSetOption(a->B, op, flg)); 1530 break; 1531 case MAT_ROW_ORIENTED: 1532 MatCheckPreallocated(A, 1); 1533 a->roworiented = flg; 1534 1535 PetscCall(MatSetOption(a->A, op, flg)); 1536 PetscCall(MatSetOption(a->B, op, flg)); 1537 break; 1538 case MAT_FORCE_DIAGONAL_ENTRIES: 1539 case MAT_SORTED_FULL: 1540 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1541 break; 1542 case MAT_IGNORE_OFF_PROC_ENTRIES: 1543 a->donotstash = flg; 1544 break; 1545 case MAT_USE_HASH_TABLE: 1546 a->ht_flag = flg; 1547 break; 1548 case MAT_HERMITIAN: 1549 MatCheckPreallocated(A, 1); 1550 PetscCall(MatSetOption(a->A, op, flg)); 1551 #if defined(PETSC_USE_COMPLEX) 1552 if (flg) { /* need different mat-vec ops */ 1553 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1554 A->ops->multadd = MatMultAdd_MPISBAIJ_Hermitian; 1555 A->ops->multtranspose = NULL; 1556 A->ops->multtransposeadd = NULL; 1557 A->symmetric = PETSC_BOOL3_FALSE; 1558 } 1559 #endif 1560 break; 1561 case MAT_SPD: 1562 case MAT_SYMMETRIC: 1563 MatCheckPreallocated(A, 1); 1564 PetscCall(MatSetOption(a->A, op, flg)); 1565 #if defined(PETSC_USE_COMPLEX) 1566 if (flg) { /* restore to use default mat-vec ops */ 1567 A->ops->mult = MatMult_MPISBAIJ; 1568 A->ops->multadd = MatMultAdd_MPISBAIJ; 1569 A->ops->multtranspose = MatMult_MPISBAIJ; 1570 A->ops->multtransposeadd = MatMultAdd_MPISBAIJ; 1571 } 1572 #endif 1573 break; 1574 case MAT_STRUCTURALLY_SYMMETRIC: 1575 MatCheckPreallocated(A, 1); 1576 PetscCall(MatSetOption(a->A, op, flg)); 1577 break; 1578 case MAT_SYMMETRY_ETERNAL: 1579 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1580 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix must be symmetric"); 1581 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1582 break; 1583 case MAT_SPD_ETERNAL: 1584 break; 1585 case MAT_IGNORE_LOWER_TRIANGULAR: 1586 aA->ignore_ltriangular = flg; 1587 break; 1588 case MAT_ERROR_LOWER_TRIANGULAR: 1589 aA->ignore_ltriangular = flg; 1590 break; 1591 case MAT_GETROW_UPPERTRIANGULAR: 1592 aA->getrow_utriangular = flg; 1593 break; 1594 default: 1595 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1596 } 1597 PetscFunctionReturn(PETSC_SUCCESS); 1598 } 1599 1600 static PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B) 1601 { 1602 PetscFunctionBegin; 1603 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 1604 if (reuse == MAT_INITIAL_MATRIX) { 1605 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B)); 1606 } else if (reuse == MAT_REUSE_MATRIX) { 1607 PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN)); 1608 } 1609 PetscFunctionReturn(PETSC_SUCCESS); 1610 } 1611 1612 static PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr) 1613 { 1614 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1615 Mat a = baij->A, b = baij->B; 1616 PetscInt nv, m, n; 1617 PetscBool flg; 1618 1619 PetscFunctionBegin; 1620 if (ll != rr) { 1621 PetscCall(VecEqual(ll, rr, &flg)); 1622 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same"); 1623 } 1624 if (!ll) PetscFunctionReturn(PETSC_SUCCESS); 1625 1626 PetscCall(MatGetLocalSize(mat, &m, &n)); 1627 PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n); 1628 1629 PetscCall(VecGetLocalSize(rr, &nv)); 1630 PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size"); 1631 1632 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1633 1634 /* left diagonalscale the off-diagonal part */ 1635 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1636 1637 /* scale the diagonal part */ 1638 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1639 1640 /* right diagonalscale the off-diagonal part */ 1641 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1642 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1643 PetscFunctionReturn(PETSC_SUCCESS); 1644 } 1645 1646 static PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1647 { 1648 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1649 1650 PetscFunctionBegin; 1651 PetscCall(MatSetUnfactored(a->A)); 1652 PetscFunctionReturn(PETSC_SUCCESS); 1653 } 1654 1655 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *); 1656 1657 static PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag) 1658 { 1659 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data; 1660 Mat a, b, c, d; 1661 PetscBool flg; 1662 1663 PetscFunctionBegin; 1664 a = matA->A; 1665 b = matA->B; 1666 c = matB->A; 1667 d = matB->B; 1668 1669 PetscCall(MatEqual(a, c, &flg)); 1670 if (flg) PetscCall(MatEqual(b, d, &flg)); 1671 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1672 PetscFunctionReturn(PETSC_SUCCESS); 1673 } 1674 1675 static PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str) 1676 { 1677 PetscBool isbaij; 1678 1679 PetscFunctionBegin; 1680 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, "")); 1681 PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name); 1682 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1683 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1684 PetscCall(MatGetRowUpperTriangular(A)); 1685 PetscCall(MatCopy_Basic(A, B, str)); 1686 PetscCall(MatRestoreRowUpperTriangular(A)); 1687 } else { 1688 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1689 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1690 1691 PetscCall(MatCopy(a->A, b->A, str)); 1692 PetscCall(MatCopy(a->B, b->B, str)); 1693 } 1694 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1695 PetscFunctionReturn(PETSC_SUCCESS); 1696 } 1697 1698 static PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1699 { 1700 Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data; 1701 PetscBLASInt bnz, one = 1; 1702 Mat_SeqSBAIJ *xa, *ya; 1703 Mat_SeqBAIJ *xb, *yb; 1704 1705 PetscFunctionBegin; 1706 if (str == SAME_NONZERO_PATTERN) { 1707 PetscScalar alpha = a; 1708 xa = (Mat_SeqSBAIJ *)xx->A->data; 1709 ya = (Mat_SeqSBAIJ *)yy->A->data; 1710 PetscCall(PetscBLASIntCast(xa->nz, &bnz)); 1711 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one)); 1712 xb = (Mat_SeqBAIJ *)xx->B->data; 1713 yb = (Mat_SeqBAIJ *)yy->B->data; 1714 PetscCall(PetscBLASIntCast(xb->nz, &bnz)); 1715 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one)); 1716 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1717 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1718 PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 1719 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1720 PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 1721 } else { 1722 Mat B; 1723 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1724 PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 1725 PetscCall(MatGetRowUpperTriangular(X)); 1726 PetscCall(MatGetRowUpperTriangular(Y)); 1727 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1728 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1729 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1730 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1731 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1732 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1733 PetscCall(MatSetType(B, MATMPISBAIJ)); 1734 PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d)); 1735 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1736 PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1737 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1738 PetscCall(MatHeaderMerge(Y, &B)); 1739 PetscCall(PetscFree(nnz_d)); 1740 PetscCall(PetscFree(nnz_o)); 1741 PetscCall(MatRestoreRowUpperTriangular(X)); 1742 PetscCall(MatRestoreRowUpperTriangular(Y)); 1743 } 1744 PetscFunctionReturn(PETSC_SUCCESS); 1745 } 1746 1747 static PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[]) 1748 { 1749 PetscInt i; 1750 PetscBool flg; 1751 1752 PetscFunctionBegin; 1753 PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */ 1754 for (i = 0; i < n; i++) { 1755 PetscCall(ISEqual(irow[i], icol[i], &flg)); 1756 if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i])); 1757 } 1758 PetscFunctionReturn(PETSC_SUCCESS); 1759 } 1760 1761 static PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a) 1762 { 1763 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data; 1764 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)maij->A->data; 1765 1766 PetscFunctionBegin; 1767 if (!Y->preallocated) { 1768 PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 1769 } else if (!aij->nz) { 1770 PetscInt nonew = aij->nonew; 1771 PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 1772 aij->nonew = nonew; 1773 } 1774 PetscCall(MatShift_Basic(Y, a)); 1775 PetscFunctionReturn(PETSC_SUCCESS); 1776 } 1777 1778 static PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d) 1779 { 1780 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1781 1782 PetscFunctionBegin; 1783 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 1784 PetscCall(MatMissingDiagonal(a->A, missing, d)); 1785 if (d) { 1786 PetscInt rstart; 1787 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1788 *d += rstart / A->rmap->bs; 1789 } 1790 PetscFunctionReturn(PETSC_SUCCESS); 1791 } 1792 1793 static PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a) 1794 { 1795 PetscFunctionBegin; 1796 *a = ((Mat_MPISBAIJ *)A->data)->A; 1797 PetscFunctionReturn(PETSC_SUCCESS); 1798 } 1799 1800 static PetscErrorCode MatEliminateZeros_MPISBAIJ(Mat A, PetscBool keep) 1801 { 1802 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1803 1804 PetscFunctionBegin; 1805 PetscCall(MatEliminateZeros_SeqSBAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 1806 PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 1807 PetscFunctionReturn(PETSC_SUCCESS); 1808 } 1809 1810 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1811 MatGetRow_MPISBAIJ, 1812 MatRestoreRow_MPISBAIJ, 1813 MatMult_MPISBAIJ, 1814 /* 4*/ MatMultAdd_MPISBAIJ, 1815 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1816 MatMultAdd_MPISBAIJ, 1817 NULL, 1818 NULL, 1819 NULL, 1820 /* 10*/ NULL, 1821 NULL, 1822 NULL, 1823 MatSOR_MPISBAIJ, 1824 MatTranspose_MPISBAIJ, 1825 /* 15*/ MatGetInfo_MPISBAIJ, 1826 MatEqual_MPISBAIJ, 1827 MatGetDiagonal_MPISBAIJ, 1828 MatDiagonalScale_MPISBAIJ, 1829 MatNorm_MPISBAIJ, 1830 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1831 MatAssemblyEnd_MPISBAIJ, 1832 MatSetOption_MPISBAIJ, 1833 MatZeroEntries_MPISBAIJ, 1834 /* 24*/ NULL, 1835 NULL, 1836 NULL, 1837 NULL, 1838 NULL, 1839 /* 29*/ MatSetUp_MPI_Hash, 1840 NULL, 1841 NULL, 1842 MatGetDiagonalBlock_MPISBAIJ, 1843 NULL, 1844 /* 34*/ MatDuplicate_MPISBAIJ, 1845 NULL, 1846 NULL, 1847 NULL, 1848 NULL, 1849 /* 39*/ MatAXPY_MPISBAIJ, 1850 MatCreateSubMatrices_MPISBAIJ, 1851 MatIncreaseOverlap_MPISBAIJ, 1852 MatGetValues_MPISBAIJ, 1853 MatCopy_MPISBAIJ, 1854 /* 44*/ NULL, 1855 MatScale_MPISBAIJ, 1856 MatShift_MPISBAIJ, 1857 NULL, 1858 NULL, 1859 /* 49*/ NULL, 1860 NULL, 1861 NULL, 1862 NULL, 1863 NULL, 1864 /* 54*/ NULL, 1865 NULL, 1866 MatSetUnfactored_MPISBAIJ, 1867 NULL, 1868 MatSetValuesBlocked_MPISBAIJ, 1869 /* 59*/ MatCreateSubMatrix_MPISBAIJ, 1870 NULL, 1871 NULL, 1872 NULL, 1873 NULL, 1874 /* 64*/ NULL, 1875 NULL, 1876 NULL, 1877 NULL, 1878 NULL, 1879 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1880 NULL, 1881 MatConvert_MPISBAIJ_Basic, 1882 NULL, 1883 NULL, 1884 /* 74*/ NULL, 1885 NULL, 1886 NULL, 1887 NULL, 1888 NULL, 1889 /* 79*/ NULL, 1890 NULL, 1891 NULL, 1892 NULL, 1893 MatLoad_MPISBAIJ, 1894 /* 84*/ NULL, 1895 NULL, 1896 NULL, 1897 NULL, 1898 NULL, 1899 /* 89*/ NULL, 1900 NULL, 1901 NULL, 1902 NULL, 1903 NULL, 1904 /* 94*/ NULL, 1905 NULL, 1906 NULL, 1907 NULL, 1908 NULL, 1909 /* 99*/ NULL, 1910 NULL, 1911 NULL, 1912 MatConjugate_MPISBAIJ, 1913 NULL, 1914 /*104*/ NULL, 1915 MatRealPart_MPISBAIJ, 1916 MatImaginaryPart_MPISBAIJ, 1917 MatGetRowUpperTriangular_MPISBAIJ, 1918 MatRestoreRowUpperTriangular_MPISBAIJ, 1919 /*109*/ NULL, 1920 NULL, 1921 NULL, 1922 NULL, 1923 MatMissingDiagonal_MPISBAIJ, 1924 /*114*/ NULL, 1925 NULL, 1926 NULL, 1927 NULL, 1928 NULL, 1929 /*119*/ NULL, 1930 NULL, 1931 NULL, 1932 NULL, 1933 NULL, 1934 /*124*/ NULL, 1935 NULL, 1936 NULL, 1937 NULL, 1938 NULL, 1939 /*129*/ NULL, 1940 NULL, 1941 NULL, 1942 NULL, 1943 NULL, 1944 /*134*/ NULL, 1945 NULL, 1946 NULL, 1947 NULL, 1948 NULL, 1949 /*139*/ MatSetBlockSizes_Default, 1950 NULL, 1951 NULL, 1952 NULL, 1953 NULL, 1954 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ, 1955 NULL, 1956 NULL, 1957 NULL, 1958 NULL, 1959 NULL, 1960 /*150*/ NULL, 1961 MatEliminateZeros_MPISBAIJ}; 1962 1963 static PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 1964 { 1965 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1966 PetscInt i, mbs, Mbs; 1967 PetscMPIInt size; 1968 1969 PetscFunctionBegin; 1970 if (B->hash_active) { 1971 B->ops[0] = b->cops; 1972 B->hash_active = PETSC_FALSE; 1973 } 1974 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 1975 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 1976 PetscCall(PetscLayoutSetUp(B->rmap)); 1977 PetscCall(PetscLayoutSetUp(B->cmap)); 1978 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 1979 PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N); 1980 PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n); 1981 1982 mbs = B->rmap->n / bs; 1983 Mbs = B->rmap->N / bs; 1984 PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs); 1985 1986 B->rmap->bs = bs; 1987 b->bs2 = bs * bs; 1988 b->mbs = mbs; 1989 b->Mbs = Mbs; 1990 b->nbs = B->cmap->n / bs; 1991 b->Nbs = B->cmap->N / bs; 1992 1993 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 1994 b->rstartbs = B->rmap->rstart / bs; 1995 b->rendbs = B->rmap->rend / bs; 1996 1997 b->cstartbs = B->cmap->rstart / bs; 1998 b->cendbs = B->cmap->rend / bs; 1999 2000 #if defined(PETSC_USE_CTABLE) 2001 PetscCall(PetscHMapIDestroy(&b->colmap)); 2002 #else 2003 PetscCall(PetscFree(b->colmap)); 2004 #endif 2005 PetscCall(PetscFree(b->garray)); 2006 PetscCall(VecDestroy(&b->lvec)); 2007 PetscCall(VecScatterDestroy(&b->Mvctx)); 2008 PetscCall(VecDestroy(&b->slvec0)); 2009 PetscCall(VecDestroy(&b->slvec0b)); 2010 PetscCall(VecDestroy(&b->slvec1)); 2011 PetscCall(VecDestroy(&b->slvec1a)); 2012 PetscCall(VecDestroy(&b->slvec1b)); 2013 PetscCall(VecScatterDestroy(&b->sMvctx)); 2014 2015 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2016 PetscCall(MatDestroy(&b->B)); 2017 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2018 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2019 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 2020 2021 PetscCall(MatDestroy(&b->A)); 2022 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2023 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2024 PetscCall(MatSetType(b->A, MATSEQSBAIJ)); 2025 2026 PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 2027 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 2028 2029 B->preallocated = PETSC_TRUE; 2030 B->was_assembled = PETSC_FALSE; 2031 B->assembled = PETSC_FALSE; 2032 PetscFunctionReturn(PETSC_SUCCESS); 2033 } 2034 2035 static PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 2036 { 2037 PetscInt m, rstart, cend; 2038 PetscInt i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2039 const PetscInt *JJ = NULL; 2040 PetscScalar *values = NULL; 2041 PetscBool roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented; 2042 PetscBool nooffprocentries; 2043 2044 PetscFunctionBegin; 2045 PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 2046 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2047 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2048 PetscCall(PetscLayoutSetUp(B->rmap)); 2049 PetscCall(PetscLayoutSetUp(B->cmap)); 2050 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2051 m = B->rmap->n / bs; 2052 rstart = B->rmap->rstart / bs; 2053 cend = B->cmap->rend / bs; 2054 2055 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2056 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2057 for (i = 0; i < m; i++) { 2058 nz = ii[i + 1] - ii[i]; 2059 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2060 /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */ 2061 JJ = jj + ii[i]; 2062 bd = 0; 2063 for (j = 0; j < nz; j++) { 2064 if (*JJ >= i + rstart) break; 2065 JJ++; 2066 bd++; 2067 } 2068 d = 0; 2069 for (; j < nz; j++) { 2070 if (*JJ++ >= cend) break; 2071 d++; 2072 } 2073 d_nnz[i] = d; 2074 o_nnz[i] = nz - d - bd; 2075 nz = nz - bd; 2076 nz_max = PetscMax(nz_max, nz); 2077 } 2078 PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2079 PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE)); 2080 PetscCall(PetscFree2(d_nnz, o_nnz)); 2081 2082 values = (PetscScalar *)V; 2083 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2084 for (i = 0; i < m; i++) { 2085 PetscInt row = i + rstart; 2086 PetscInt ncols = ii[i + 1] - ii[i]; 2087 const PetscInt *icols = jj + ii[i]; 2088 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2089 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2090 PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2091 } else { /* block ordering does not match so we can only insert one block at a time. */ 2092 PetscInt j; 2093 for (j = 0; j < ncols; j++) { 2094 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2095 PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2096 } 2097 } 2098 } 2099 2100 if (!V) PetscCall(PetscFree(values)); 2101 nooffprocentries = B->nooffprocentries; 2102 B->nooffprocentries = PETSC_TRUE; 2103 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2104 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2105 B->nooffprocentries = nooffprocentries; 2106 2107 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2108 PetscFunctionReturn(PETSC_SUCCESS); 2109 } 2110 2111 /*MC 2112 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2113 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2114 the matrix is stored. 2115 2116 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2117 can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`); 2118 2119 Options Database Key: 2120 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()` 2121 2122 Level: beginner 2123 2124 Note: 2125 The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the 2126 diagonal portion of the matrix of each process has to less than or equal the number of columns. 2127 2128 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType` 2129 M*/ 2130 2131 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2132 { 2133 Mat_MPISBAIJ *b; 2134 PetscBool flg = PETSC_FALSE; 2135 2136 PetscFunctionBegin; 2137 PetscCall(PetscNew(&b)); 2138 B->data = (void *)b; 2139 B->ops[0] = MatOps_Values; 2140 2141 B->ops->destroy = MatDestroy_MPISBAIJ; 2142 B->ops->view = MatView_MPISBAIJ; 2143 B->assembled = PETSC_FALSE; 2144 B->insertmode = NOT_SET_VALUES; 2145 2146 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2147 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2148 2149 /* build local table of row and column ownerships */ 2150 PetscCall(PetscMalloc1(b->size + 2, &b->rangebs)); 2151 2152 /* build cache for off array entries formed */ 2153 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2154 2155 b->donotstash = PETSC_FALSE; 2156 b->colmap = NULL; 2157 b->garray = NULL; 2158 b->roworiented = PETSC_TRUE; 2159 2160 /* stuff used in block assembly */ 2161 b->barray = NULL; 2162 2163 /* stuff used for matrix vector multiply */ 2164 b->lvec = NULL; 2165 b->Mvctx = NULL; 2166 b->slvec0 = NULL; 2167 b->slvec0b = NULL; 2168 b->slvec1 = NULL; 2169 b->slvec1a = NULL; 2170 b->slvec1b = NULL; 2171 b->sMvctx = NULL; 2172 2173 /* stuff for MatGetRow() */ 2174 b->rowindices = NULL; 2175 b->rowvalues = NULL; 2176 b->getrowactive = PETSC_FALSE; 2177 2178 /* hash table stuff */ 2179 b->ht = NULL; 2180 b->hd = NULL; 2181 b->ht_size = 0; 2182 b->ht_flag = PETSC_FALSE; 2183 b->ht_fact = 0; 2184 b->ht_total_ct = 0; 2185 b->ht_insert_ct = 0; 2186 2187 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2188 b->ijonly = PETSC_FALSE; 2189 2190 b->in_loc = NULL; 2191 b->v_loc = NULL; 2192 b->n_loc = 0; 2193 2194 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ)); 2195 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ)); 2196 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ)); 2197 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ)); 2198 #if defined(PETSC_HAVE_ELEMENTAL) 2199 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental)); 2200 #endif 2201 #if defined(PETSC_HAVE_SCALAPACK) 2202 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK)); 2203 #endif 2204 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic)); 2205 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic)); 2206 2207 B->symmetric = PETSC_BOOL3_TRUE; 2208 B->structurally_symmetric = PETSC_BOOL3_TRUE; 2209 B->symmetry_eternal = PETSC_TRUE; 2210 B->structural_symmetry_eternal = PETSC_TRUE; 2211 #if defined(PETSC_USE_COMPLEX) 2212 B->hermitian = PETSC_BOOL3_FALSE; 2213 #else 2214 B->hermitian = PETSC_BOOL3_TRUE; 2215 #endif 2216 2217 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ)); 2218 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat"); 2219 PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL)); 2220 if (flg) { 2221 PetscReal fact = 1.39; 2222 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2223 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2224 if (fact <= 1.0) fact = 1.39; 2225 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2226 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2227 } 2228 PetscOptionsEnd(); 2229 PetscFunctionReturn(PETSC_SUCCESS); 2230 } 2231 2232 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2233 /*MC 2234 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2235 2236 This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator, 2237 and `MATMPISBAIJ` otherwise. 2238 2239 Options Database Key: 2240 . -mat_type sbaij - sets the matrix type to `MATSBAIJ` during a call to `MatSetFromOptions()` 2241 2242 Level: beginner 2243 2244 .seealso: [](ch_matrices), `Mat`, `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ()`, `MATSEQSBAIJ`, `MATMPISBAIJ` 2245 M*/ 2246 2247 /*@C 2248 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2249 the user should preallocate the matrix storage by setting the parameters 2250 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2251 performance can be increased by more than a factor of 50. 2252 2253 Collective 2254 2255 Input Parameters: 2256 + B - the matrix 2257 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2258 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2259 . d_nz - number of block nonzeros per block row in diagonal portion of local 2260 submatrix (same for all local rows) 2261 . d_nnz - array containing the number of block nonzeros in the various block rows 2262 in the upper triangular and diagonal part of the in diagonal portion of the local 2263 (possibly different for each block row) or `NULL`. If you plan to factor the matrix you must leave room 2264 for the diagonal entry and set a value even if it is zero. 2265 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2266 submatrix (same for all local rows). 2267 - o_nnz - array containing the number of nonzeros in the various block rows of the 2268 off-diagonal portion of the local submatrix that is right of the diagonal 2269 (possibly different for each block row) or `NULL`. 2270 2271 Options Database Keys: 2272 + -mat_no_unroll - uses code that does not unroll the loops in the 2273 block calculations (much slower) 2274 - -mat_block_size - size of the blocks to use 2275 2276 Level: intermediate 2277 2278 Notes: 2279 2280 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2281 than it must be used on all processors that share the object for that argument. 2282 2283 If the *_nnz parameter is given then the *_nz parameter is ignored 2284 2285 Storage Information: 2286 For a square global matrix we define each processor's diagonal portion 2287 to be its local rows and the corresponding columns (a square submatrix); 2288 each processor's off-diagonal portion encompasses the remainder of the 2289 local matrix (a rectangular submatrix). 2290 2291 The user can specify preallocated storage for the diagonal part of 2292 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 2293 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 2294 memory allocation. Likewise, specify preallocated storage for the 2295 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 2296 2297 You can call `MatGetInfo()` to get information on how effective the preallocation was; 2298 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2299 You can also run with the option `-info` and look for messages with the string 2300 malloc in them to see if additional memory allocation was needed. 2301 2302 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2303 the figure below we depict these three local rows and all columns (0-11). 2304 2305 .vb 2306 0 1 2 3 4 5 6 7 8 9 10 11 2307 -------------------------- 2308 row 3 |. . . d d d o o o o o o 2309 row 4 |. . . d d d o o o o o o 2310 row 5 |. . . d d d o o o o o o 2311 -------------------------- 2312 .ve 2313 2314 Thus, any entries in the d locations are stored in the d (diagonal) 2315 submatrix, and any entries in the o locations are stored in the 2316 o (off-diagonal) submatrix. Note that the d matrix is stored in 2317 `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format. 2318 2319 Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular 2320 plus the diagonal part of the d matrix, 2321 and `o_nz` should indicate the number of block nonzeros per row in the o matrix 2322 2323 In general, for PDE problems in which most nonzeros are near the diagonal, 2324 one expects `d_nz` >> `o_nz`. 2325 2326 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()` 2327 @*/ 2328 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2329 { 2330 PetscFunctionBegin; 2331 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2332 PetscValidType(B, 1); 2333 PetscValidLogicalCollectiveInt(B, bs, 2); 2334 PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 2335 PetscFunctionReturn(PETSC_SUCCESS); 2336 } 2337 2338 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2339 /*@C 2340 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`, 2341 (block compressed row). For good matrix assembly performance 2342 the user should preallocate the matrix storage by setting the parameters 2343 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 2344 2345 Collective 2346 2347 Input Parameters: 2348 + comm - MPI communicator 2349 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2350 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 2351 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 2352 This value should be the same as the local size used in creating the 2353 y vector for the matrix-vector product y = Ax. 2354 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 2355 This value should be the same as the local size used in creating the 2356 x vector for the matrix-vector product y = Ax. 2357 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 2358 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 2359 . d_nz - number of block nonzeros per block row in diagonal portion of local 2360 submatrix (same for all local rows) 2361 . d_nnz - array containing the number of block nonzeros in the various block rows 2362 in the upper triangular portion of the in diagonal portion of the local 2363 (possibly different for each block block row) or `NULL`. 2364 If you plan to factor the matrix you must leave room for the diagonal entry and 2365 set its value even if it is zero. 2366 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2367 submatrix (same for all local rows). 2368 - o_nnz - array containing the number of nonzeros in the various block rows of the 2369 off-diagonal portion of the local submatrix (possibly different for 2370 each block row) or `NULL`. 2371 2372 Output Parameter: 2373 . A - the matrix 2374 2375 Options Database Keys: 2376 + -mat_no_unroll - uses code that does not unroll the loops in the 2377 block calculations (much slower) 2378 . -mat_block_size - size of the blocks to use 2379 - -mat_mpi - use the parallel matrix data structures even on one processor 2380 (defaults to using SeqBAIJ format on one processor) 2381 2382 Level: intermediate 2383 2384 Notes: 2385 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 2386 MatXXXXSetPreallocation() paradigm instead of this routine directly. 2387 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 2388 2389 The number of rows and columns must be divisible by blocksize. 2390 This matrix type does not support complex Hermitian operation. 2391 2392 The user MUST specify either the local or global matrix dimensions 2393 (possibly both). 2394 2395 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2396 than it must be used on all processors that share the object for that argument. 2397 2398 If the *_nnz parameter is given then the *_nz parameter is ignored 2399 2400 Storage Information: 2401 For a square global matrix we define each processor's diagonal portion 2402 to be its local rows and the corresponding columns (a square submatrix); 2403 each processor's off-diagonal portion encompasses the remainder of the 2404 local matrix (a rectangular submatrix). 2405 2406 The user can specify preallocated storage for the diagonal part of 2407 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 2408 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 2409 memory allocation. Likewise, specify preallocated storage for the 2410 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 2411 2412 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2413 the figure below we depict these three local rows and all columns (0-11). 2414 2415 .vb 2416 0 1 2 3 4 5 6 7 8 9 10 11 2417 -------------------------- 2418 row 3 |. . . d d d o o o o o o 2419 row 4 |. . . d d d o o o o o o 2420 row 5 |. . . d d d o o o o o o 2421 -------------------------- 2422 .ve 2423 2424 Thus, any entries in the d locations are stored in the d (diagonal) 2425 submatrix, and any entries in the o locations are stored in the 2426 o (off-diagonal) submatrix. Note that the d matrix is stored in 2427 `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format. 2428 2429 Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular 2430 plus the diagonal part of the d matrix, 2431 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 2432 In general, for PDE problems in which most nonzeros are near the diagonal, 2433 one expects `d_nz` >> `o_nz`. 2434 2435 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 2436 @*/ 2437 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 2438 { 2439 PetscMPIInt size; 2440 2441 PetscFunctionBegin; 2442 PetscCall(MatCreate(comm, A)); 2443 PetscCall(MatSetSizes(*A, m, n, M, N)); 2444 PetscCallMPI(MPI_Comm_size(comm, &size)); 2445 if (size > 1) { 2446 PetscCall(MatSetType(*A, MATMPISBAIJ)); 2447 PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 2448 } else { 2449 PetscCall(MatSetType(*A, MATSEQSBAIJ)); 2450 PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 2451 } 2452 PetscFunctionReturn(PETSC_SUCCESS); 2453 } 2454 2455 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2456 { 2457 Mat mat; 2458 Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data; 2459 PetscInt len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs; 2460 PetscScalar *array; 2461 2462 PetscFunctionBegin; 2463 *newmat = NULL; 2464 2465 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2466 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2467 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2468 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2469 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2470 2471 mat->factortype = matin->factortype; 2472 mat->preallocated = PETSC_TRUE; 2473 mat->assembled = PETSC_TRUE; 2474 mat->insertmode = NOT_SET_VALUES; 2475 2476 a = (Mat_MPISBAIJ *)mat->data; 2477 a->bs2 = oldmat->bs2; 2478 a->mbs = oldmat->mbs; 2479 a->nbs = oldmat->nbs; 2480 a->Mbs = oldmat->Mbs; 2481 a->Nbs = oldmat->Nbs; 2482 2483 a->size = oldmat->size; 2484 a->rank = oldmat->rank; 2485 a->donotstash = oldmat->donotstash; 2486 a->roworiented = oldmat->roworiented; 2487 a->rowindices = NULL; 2488 a->rowvalues = NULL; 2489 a->getrowactive = PETSC_FALSE; 2490 a->barray = NULL; 2491 a->rstartbs = oldmat->rstartbs; 2492 a->rendbs = oldmat->rendbs; 2493 a->cstartbs = oldmat->cstartbs; 2494 a->cendbs = oldmat->cendbs; 2495 2496 /* hash table stuff */ 2497 a->ht = NULL; 2498 a->hd = NULL; 2499 a->ht_size = 0; 2500 a->ht_flag = oldmat->ht_flag; 2501 a->ht_fact = oldmat->ht_fact; 2502 a->ht_total_ct = 0; 2503 a->ht_insert_ct = 0; 2504 2505 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2)); 2506 if (oldmat->colmap) { 2507 #if defined(PETSC_USE_CTABLE) 2508 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 2509 #else 2510 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 2511 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 2512 #endif 2513 } else a->colmap = NULL; 2514 2515 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) { 2516 PetscCall(PetscMalloc1(len, &a->garray)); 2517 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 2518 } else a->garray = NULL; 2519 2520 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 2521 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 2522 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 2523 2524 PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0)); 2525 PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1)); 2526 2527 PetscCall(VecGetLocalSize(a->slvec1, &nt)); 2528 PetscCall(VecGetArray(a->slvec1, &array)); 2529 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a)); 2530 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b)); 2531 PetscCall(VecRestoreArray(a->slvec1, &array)); 2532 PetscCall(VecGetArray(a->slvec0, &array)); 2533 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b)); 2534 PetscCall(VecRestoreArray(a->slvec0, &array)); 2535 2536 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2537 PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx)); 2538 a->sMvctx = oldmat->sMvctx; 2539 2540 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 2541 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 2542 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 2543 *newmat = mat; 2544 PetscFunctionReturn(PETSC_SUCCESS); 2545 } 2546 2547 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 2548 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary 2549 2550 PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer) 2551 { 2552 PetscBool isbinary; 2553 2554 PetscFunctionBegin; 2555 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2556 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 2557 PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer)); 2558 PetscFunctionReturn(PETSC_SUCCESS); 2559 } 2560 2561 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[]) 2562 { 2563 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 2564 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(a->B)->data; 2565 PetscReal atmp; 2566 PetscReal *work, *svalues, *rvalues; 2567 PetscInt i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol; 2568 PetscMPIInt rank, size; 2569 PetscInt *rowners_bs, dest, count, source; 2570 PetscScalar *va; 2571 MatScalar *ba; 2572 MPI_Status stat; 2573 2574 PetscFunctionBegin; 2575 PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov"); 2576 PetscCall(MatGetRowMaxAbs(a->A, v, NULL)); 2577 PetscCall(VecGetArray(v, &va)); 2578 2579 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2580 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2581 2582 bs = A->rmap->bs; 2583 mbs = a->mbs; 2584 Mbs = a->Mbs; 2585 ba = b->a; 2586 bi = b->i; 2587 bj = b->j; 2588 2589 /* find ownerships */ 2590 rowners_bs = A->rmap->range; 2591 2592 /* each proc creates an array to be distributed */ 2593 PetscCall(PetscCalloc1(bs * Mbs, &work)); 2594 2595 /* row_max for B */ 2596 if (rank != size - 1) { 2597 for (i = 0; i < mbs; i++) { 2598 ncols = bi[1] - bi[0]; 2599 bi++; 2600 brow = bs * i; 2601 for (j = 0; j < ncols; j++) { 2602 bcol = bs * (*bj); 2603 for (kcol = 0; kcol < bs; kcol++) { 2604 col = bcol + kcol; /* local col index */ 2605 col += rowners_bs[rank + 1]; /* global col index */ 2606 for (krow = 0; krow < bs; krow++) { 2607 atmp = PetscAbsScalar(*ba); 2608 ba++; 2609 row = brow + krow; /* local row index */ 2610 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2611 if (work[col] < atmp) work[col] = atmp; 2612 } 2613 } 2614 bj++; 2615 } 2616 } 2617 2618 /* send values to its owners */ 2619 for (dest = rank + 1; dest < size; dest++) { 2620 svalues = work + rowners_bs[dest]; 2621 count = rowners_bs[dest + 1] - rowners_bs[dest]; 2622 PetscCallMPI(MPI_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A))); 2623 } 2624 } 2625 2626 /* receive values */ 2627 if (rank) { 2628 rvalues = work; 2629 count = rowners_bs[rank + 1] - rowners_bs[rank]; 2630 for (source = 0; source < rank; source++) { 2631 PetscCallMPI(MPI_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat)); 2632 /* process values */ 2633 for (i = 0; i < count; i++) { 2634 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2635 } 2636 } 2637 } 2638 2639 PetscCall(VecRestoreArray(v, &va)); 2640 PetscCall(PetscFree(work)); 2641 PetscFunctionReturn(PETSC_SUCCESS); 2642 } 2643 2644 PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2645 { 2646 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 2647 PetscInt mbs = mat->mbs, bs = matin->rmap->bs; 2648 PetscScalar *x, *ptr, *from; 2649 Vec bb1; 2650 const PetscScalar *b; 2651 2652 PetscFunctionBegin; 2653 PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 2654 PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented"); 2655 2656 if (flag == SOR_APPLY_UPPER) { 2657 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2658 PetscFunctionReturn(PETSC_SUCCESS); 2659 } 2660 2661 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2662 if (flag & SOR_ZERO_INITIAL_GUESS) { 2663 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx)); 2664 its--; 2665 } 2666 2667 PetscCall(VecDuplicate(bb, &bb1)); 2668 while (its--) { 2669 /* lower triangular part: slvec0b = - B^T*xx */ 2670 PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 2671 2672 /* copy xx into slvec0a */ 2673 PetscCall(VecGetArray(mat->slvec0, &ptr)); 2674 PetscCall(VecGetArray(xx, &x)); 2675 PetscCall(PetscArraycpy(ptr, x, bs * mbs)); 2676 PetscCall(VecRestoreArray(mat->slvec0, &ptr)); 2677 2678 PetscCall(VecScale(mat->slvec0, -1.0)); 2679 2680 /* copy bb into slvec1a */ 2681 PetscCall(VecGetArray(mat->slvec1, &ptr)); 2682 PetscCall(VecGetArrayRead(bb, &b)); 2683 PetscCall(PetscArraycpy(ptr, b, bs * mbs)); 2684 PetscCall(VecRestoreArray(mat->slvec1, &ptr)); 2685 2686 /* set slvec1b = 0 */ 2687 PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b)); 2688 PetscCall(VecZeroEntries(mat->slvec1b)); 2689 2690 PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2691 PetscCall(VecRestoreArray(xx, &x)); 2692 PetscCall(VecRestoreArrayRead(bb, &b)); 2693 PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2694 2695 /* upper triangular part: bb1 = bb1 - B*x */ 2696 PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1)); 2697 2698 /* local diagonal sweep */ 2699 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx)); 2700 } 2701 PetscCall(VecDestroy(&bb1)); 2702 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2703 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2704 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2705 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2706 } else if (flag & SOR_EISENSTAT) { 2707 Vec xx1; 2708 PetscBool hasop; 2709 const PetscScalar *diag; 2710 PetscScalar *sl, scale = (omega - 2.0) / omega; 2711 PetscInt i, n; 2712 2713 if (!mat->xx1) { 2714 PetscCall(VecDuplicate(bb, &mat->xx1)); 2715 PetscCall(VecDuplicate(bb, &mat->bb1)); 2716 } 2717 xx1 = mat->xx1; 2718 bb1 = mat->bb1; 2719 2720 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 2721 2722 if (!mat->diag) { 2723 /* this is wrong for same matrix with new nonzero values */ 2724 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 2725 PetscCall(MatGetDiagonal(matin, mat->diag)); 2726 } 2727 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 2728 2729 if (hasop) { 2730 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 2731 PetscCall(VecAYPX(mat->slvec1a, scale, bb)); 2732 } else { 2733 /* 2734 These two lines are replaced by code that may be a bit faster for a good compiler 2735 PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx)); 2736 PetscCall(VecAYPX(mat->slvec1a,scale,bb)); 2737 */ 2738 PetscCall(VecGetArray(mat->slvec1a, &sl)); 2739 PetscCall(VecGetArrayRead(mat->diag, &diag)); 2740 PetscCall(VecGetArrayRead(bb, &b)); 2741 PetscCall(VecGetArray(xx, &x)); 2742 PetscCall(VecGetLocalSize(xx, &n)); 2743 if (omega == 1.0) { 2744 for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i]; 2745 PetscCall(PetscLogFlops(2.0 * n)); 2746 } else { 2747 for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i]; 2748 PetscCall(PetscLogFlops(3.0 * n)); 2749 } 2750 PetscCall(VecRestoreArray(mat->slvec1a, &sl)); 2751 PetscCall(VecRestoreArrayRead(mat->diag, &diag)); 2752 PetscCall(VecRestoreArrayRead(bb, &b)); 2753 PetscCall(VecRestoreArray(xx, &x)); 2754 } 2755 2756 /* multiply off-diagonal portion of matrix */ 2757 PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b)); 2758 PetscCall(VecZeroEntries(mat->slvec1b)); 2759 PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 2760 PetscCall(VecGetArray(mat->slvec0, &from)); 2761 PetscCall(VecGetArray(xx, &x)); 2762 PetscCall(PetscArraycpy(from, x, bs * mbs)); 2763 PetscCall(VecRestoreArray(mat->slvec0, &from)); 2764 PetscCall(VecRestoreArray(xx, &x)); 2765 PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2766 PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2767 PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a)); 2768 2769 /* local sweep */ 2770 PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 2771 PetscCall(VecAXPY(xx, 1.0, xx1)); 2772 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format"); 2773 PetscFunctionReturn(PETSC_SUCCESS); 2774 } 2775 2776 /*@ 2777 MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard 2778 CSR format the local rows. 2779 2780 Collective 2781 2782 Input Parameters: 2783 + comm - MPI communicator 2784 . bs - the block size, only a block size of 1 is supported 2785 . m - number of local rows (Cannot be `PETSC_DECIDE`) 2786 . n - This value should be the same as the local size used in creating the 2787 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 2788 calculated if `N` is given) For square matrices `n` is almost always `m`. 2789 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 2790 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 2791 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 2792 . j - column indices 2793 - a - matrix values 2794 2795 Output Parameter: 2796 . mat - the matrix 2797 2798 Level: intermediate 2799 2800 Notes: 2801 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 2802 thus you CANNOT change the matrix entries by changing the values of `a` after you have 2803 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 2804 2805 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 2806 2807 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 2808 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 2809 @*/ 2810 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 2811 { 2812 PetscFunctionBegin; 2813 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 2814 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2815 PetscCall(MatCreate(comm, mat)); 2816 PetscCall(MatSetSizes(*mat, m, n, M, N)); 2817 PetscCall(MatSetType(*mat, MATMPISBAIJ)); 2818 PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 2819 PetscFunctionReturn(PETSC_SUCCESS); 2820 } 2821 2822 /*@C 2823 MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values 2824 2825 Collective 2826 2827 Input Parameters: 2828 + B - the matrix 2829 . bs - the block size 2830 . i - the indices into `j` for the start of each local row (starts with zero) 2831 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2832 - v - optional values in the matrix 2833 2834 Level: advanced 2835 2836 Notes: 2837 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries 2838 and usually the numerical values as well 2839 2840 Any entries below the diagonal are ignored 2841 2842 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ` 2843 @*/ 2844 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2845 { 2846 PetscFunctionBegin; 2847 PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2848 PetscFunctionReturn(PETSC_SUCCESS); 2849 } 2850 2851 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 2852 { 2853 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 2854 PetscInt *indx; 2855 PetscScalar *values; 2856 2857 PetscFunctionBegin; 2858 PetscCall(MatGetSize(inmat, &m, &N)); 2859 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 2860 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data; 2861 PetscInt *dnz, *onz, mbs, Nbs, nbs; 2862 PetscInt *bindx, rmax = a->rmax, j; 2863 PetscMPIInt rank, size; 2864 2865 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 2866 mbs = m / bs; 2867 Nbs = N / cbs; 2868 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 2869 nbs = n / cbs; 2870 2871 PetscCall(PetscMalloc1(rmax, &bindx)); 2872 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 2873 2874 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2875 PetscCallMPI(MPI_Comm_rank(comm, &size)); 2876 if (rank == size - 1) { 2877 /* Check sum(nbs) = Nbs */ 2878 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 2879 } 2880 2881 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 2882 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 2883 for (i = 0; i < mbs; i++) { 2884 PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 2885 nnz = nnz / bs; 2886 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 2887 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 2888 PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 2889 } 2890 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 2891 PetscCall(PetscFree(bindx)); 2892 2893 PetscCall(MatCreate(comm, outmat)); 2894 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 2895 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 2896 PetscCall(MatSetType(*outmat, MATSBAIJ)); 2897 PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz)); 2898 PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 2899 MatPreallocateEnd(dnz, onz); 2900 } 2901 2902 /* numeric phase */ 2903 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 2904 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 2905 2906 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 2907 for (i = 0; i < m; i++) { 2908 PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 2909 Ii = i + rstart; 2910 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 2911 PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 2912 } 2913 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 2914 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 2915 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 2916 PetscFunctionReturn(PETSC_SUCCESS); 2917 } 2918