1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petscblaslapack.h> 5 6 static PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 7 { 8 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 9 10 PetscFunctionBegin; 11 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 12 PetscCall(MatStashDestroy_Private(&mat->stash)); 13 PetscCall(MatStashDestroy_Private(&mat->bstash)); 14 PetscCall(MatDestroy(&baij->A)); 15 PetscCall(MatDestroy(&baij->B)); 16 #if defined(PETSC_USE_CTABLE) 17 PetscCall(PetscHMapIDestroy(&baij->colmap)); 18 #else 19 PetscCall(PetscFree(baij->colmap)); 20 #endif 21 PetscCall(PetscFree(baij->garray)); 22 PetscCall(VecDestroy(&baij->lvec)); 23 PetscCall(VecScatterDestroy(&baij->Mvctx)); 24 PetscCall(VecDestroy(&baij->slvec0)); 25 PetscCall(VecDestroy(&baij->slvec0b)); 26 PetscCall(VecDestroy(&baij->slvec1)); 27 PetscCall(VecDestroy(&baij->slvec1a)); 28 PetscCall(VecDestroy(&baij->slvec1b)); 29 PetscCall(VecScatterDestroy(&baij->sMvctx)); 30 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 31 PetscCall(PetscFree(baij->barray)); 32 PetscCall(PetscFree(baij->hd)); 33 PetscCall(VecDestroy(&baij->diag)); 34 PetscCall(VecDestroy(&baij->bb1)); 35 PetscCall(VecDestroy(&baij->xx1)); 36 #if defined(PETSC_USE_REAL_MAT_SINGLE) 37 PetscCall(PetscFree(baij->setvaluescopy)); 38 #endif 39 PetscCall(PetscFree(baij->in_loc)); 40 PetscCall(PetscFree(baij->v_loc)); 41 PetscCall(PetscFree(baij->rangebs)); 42 PetscCall(PetscFree(mat->data)); 43 44 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 45 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 47 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL)); 48 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL)); 49 #if defined(PETSC_HAVE_ELEMENTAL) 50 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL)); 51 #endif 52 #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE)) 53 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL)); 54 #endif 55 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL)); 56 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL)); 57 PetscFunctionReturn(PETSC_SUCCESS); 58 } 59 60 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), MatAssemblyEnd_MPI_Hash(), MatSetUp_MPI_Hash() */ 61 #define TYPE SBAIJ 62 #define TYPE_SBAIJ 63 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 64 #undef TYPE 65 #undef TYPE_SBAIJ 66 67 #if defined(PETSC_HAVE_ELEMENTAL) 68 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 69 #endif 70 #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE)) 71 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 72 #endif 73 74 /* This could be moved to matimpl.h */ 75 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) 76 { 77 Mat preallocator; 78 PetscInt r, rstart, rend; 79 PetscInt bs, i, m, n, M, N; 80 PetscBool cong = PETSC_TRUE; 81 82 PetscFunctionBegin; 83 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 84 PetscValidLogicalCollectiveInt(B, nm, 2); 85 for (i = 0; i < nm; i++) { 86 PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3); 87 PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong)); 88 PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts"); 89 } 90 PetscValidLogicalCollectiveBool(B, fill, 5); 91 PetscCall(MatGetBlockSize(B, &bs)); 92 PetscCall(MatGetSize(B, &M, &N)); 93 PetscCall(MatGetLocalSize(B, &m, &n)); 94 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator)); 95 PetscCall(MatSetType(preallocator, MATPREALLOCATOR)); 96 PetscCall(MatSetBlockSize(preallocator, bs)); 97 PetscCall(MatSetSizes(preallocator, m, n, M, N)); 98 PetscCall(MatSetUp(preallocator)); 99 PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend)); 100 for (r = rstart; r < rend; ++r) { 101 PetscInt ncols; 102 const PetscInt *row; 103 const PetscScalar *vals; 104 105 for (i = 0; i < nm; i++) { 106 PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals)); 107 PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES)); 108 if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES)); 109 PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals)); 110 } 111 } 112 PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY)); 113 PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY)); 114 PetscCall(MatPreallocatorPreallocate(preallocator, fill, B)); 115 PetscCall(MatDestroy(&preallocator)); 116 PetscFunctionReturn(PETSC_SUCCESS); 117 } 118 119 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 120 { 121 Mat B; 122 PetscInt r; 123 124 PetscFunctionBegin; 125 if (reuse != MAT_REUSE_MATRIX) { 126 PetscBool symm = PETSC_TRUE, isdense; 127 PetscInt bs; 128 129 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 130 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 131 PetscCall(MatSetType(B, newtype)); 132 PetscCall(MatGetBlockSize(A, &bs)); 133 PetscCall(MatSetBlockSize(B, bs)); 134 PetscCall(PetscLayoutSetUp(B->rmap)); 135 PetscCall(PetscLayoutSetUp(B->cmap)); 136 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, "")); 137 if (!isdense) { 138 PetscCall(MatGetRowUpperTriangular(A)); 139 PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE)); 140 PetscCall(MatRestoreRowUpperTriangular(A)); 141 } else { 142 PetscCall(MatSetUp(B)); 143 } 144 } else { 145 B = *newmat; 146 PetscCall(MatZeroEntries(B)); 147 } 148 149 PetscCall(MatGetRowUpperTriangular(A)); 150 for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 151 PetscInt ncols; 152 const PetscInt *row; 153 const PetscScalar *vals; 154 155 PetscCall(MatGetRow(A, r, &ncols, &row, &vals)); 156 PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES)); 157 #if defined(PETSC_USE_COMPLEX) 158 if (A->hermitian == PETSC_BOOL3_TRUE) { 159 PetscInt i; 160 for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES)); 161 } else { 162 PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 163 } 164 #else 165 PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 166 #endif 167 PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals)); 168 } 169 PetscCall(MatRestoreRowUpperTriangular(A)); 170 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 171 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 172 173 if (reuse == MAT_INPLACE_MATRIX) { 174 PetscCall(MatHeaderReplace(A, &B)); 175 } else { 176 *newmat = B; 177 } 178 PetscFunctionReturn(PETSC_SUCCESS); 179 } 180 181 static PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 182 { 183 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 184 185 PetscFunctionBegin; 186 PetscCall(MatStoreValues(aij->A)); 187 PetscCall(MatStoreValues(aij->B)); 188 PetscFunctionReturn(PETSC_SUCCESS); 189 } 190 191 static PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 192 { 193 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 194 195 PetscFunctionBegin; 196 PetscCall(MatRetrieveValues(aij->A)); 197 PetscCall(MatRetrieveValues(aij->B)); 198 PetscFunctionReturn(PETSC_SUCCESS); 199 } 200 201 #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 202 do { \ 203 brow = row / bs; \ 204 rp = aj + ai[brow]; \ 205 ap = aa + bs2 * ai[brow]; \ 206 rmax = aimax[brow]; \ 207 nrow = ailen[brow]; \ 208 bcol = col / bs; \ 209 ridx = row % bs; \ 210 cidx = col % bs; \ 211 low = 0; \ 212 high = nrow; \ 213 while (high - low > 3) { \ 214 t = (low + high) / 2; \ 215 if (rp[t] > bcol) high = t; \ 216 else low = t; \ 217 } \ 218 for (_i = low; _i < high; _i++) { \ 219 if (rp[_i] > bcol) break; \ 220 if (rp[_i] == bcol) { \ 221 bap = ap + bs2 * _i + bs * cidx + ridx; \ 222 if (addv == ADD_VALUES) *bap += value; \ 223 else *bap = value; \ 224 goto a_noinsert; \ 225 } \ 226 } \ 227 if (a->nonew == 1) goto a_noinsert; \ 228 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 229 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 230 N = nrow++ - 1; \ 231 /* shift up all the later entries in this row */ \ 232 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 233 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 234 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 235 rp[_i] = bcol; \ 236 ap[bs2 * _i + bs * cidx + ridx] = value; \ 237 a_noinsert:; \ 238 ailen[brow] = nrow; \ 239 } while (0) 240 241 #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 242 do { \ 243 brow = row / bs; \ 244 rp = bj + bi[brow]; \ 245 ap = ba + bs2 * bi[brow]; \ 246 rmax = bimax[brow]; \ 247 nrow = bilen[brow]; \ 248 bcol = col / bs; \ 249 ridx = row % bs; \ 250 cidx = col % bs; \ 251 low = 0; \ 252 high = nrow; \ 253 while (high - low > 3) { \ 254 t = (low + high) / 2; \ 255 if (rp[t] > bcol) high = t; \ 256 else low = t; \ 257 } \ 258 for (_i = low; _i < high; _i++) { \ 259 if (rp[_i] > bcol) break; \ 260 if (rp[_i] == bcol) { \ 261 bap = ap + bs2 * _i + bs * cidx + ridx; \ 262 if (addv == ADD_VALUES) *bap += value; \ 263 else *bap = value; \ 264 goto b_noinsert; \ 265 } \ 266 } \ 267 if (b->nonew == 1) goto b_noinsert; \ 268 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 269 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 270 N = nrow++ - 1; \ 271 /* shift up all the later entries in this row */ \ 272 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 273 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 274 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 275 rp[_i] = bcol; \ 276 ap[bs2 * _i + bs * cidx + ridx] = value; \ 277 b_noinsert:; \ 278 bilen[brow] = nrow; \ 279 } while (0) 280 281 /* Only add/insert a(i,j) with i<=j (blocks). 282 Any a(i,j) with i>j input by user is ignored or generates an error 283 */ 284 static PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 285 { 286 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 287 MatScalar value; 288 PetscBool roworiented = baij->roworiented; 289 PetscInt i, j, row, col; 290 PetscInt rstart_orig = mat->rmap->rstart; 291 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 292 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 293 294 /* Some Variables required in the macro */ 295 Mat A = baij->A; 296 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 297 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 298 MatScalar *aa = a->a; 299 300 Mat B = baij->B; 301 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 302 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 303 MatScalar *ba = b->a; 304 305 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 306 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 307 MatScalar *ap, *bap; 308 309 /* for stash */ 310 PetscInt n_loc, *in_loc = NULL; 311 MatScalar *v_loc = NULL; 312 313 PetscFunctionBegin; 314 if (!baij->donotstash) { 315 if (n > baij->n_loc) { 316 PetscCall(PetscFree(baij->in_loc)); 317 PetscCall(PetscFree(baij->v_loc)); 318 PetscCall(PetscMalloc1(n, &baij->in_loc)); 319 PetscCall(PetscMalloc1(n, &baij->v_loc)); 320 321 baij->n_loc = n; 322 } 323 in_loc = baij->in_loc; 324 v_loc = baij->v_loc; 325 } 326 327 for (i = 0; i < m; i++) { 328 if (im[i] < 0) continue; 329 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 330 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 331 row = im[i] - rstart_orig; /* local row index */ 332 for (j = 0; j < n; j++) { 333 if (im[i] / bs > in[j] / bs) { 334 PetscCheck(a->ignore_ltriangular, PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 335 continue; /* ignore lower triangular blocks */ 336 } 337 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 338 col = in[j] - cstart_orig; /* local col index */ 339 brow = row / bs; 340 bcol = col / bs; 341 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 342 if (roworiented) value = v[i * n + j]; 343 else value = v[i + j * m]; 344 MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 345 /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */ 346 } else if (in[j] < 0) { 347 continue; 348 } else { 349 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 350 /* off-diag entry (B) */ 351 if (mat->was_assembled) { 352 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 353 #if defined(PETSC_USE_CTABLE) 354 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 355 col = col - 1; 356 #else 357 col = baij->colmap[in[j] / bs] - 1; 358 #endif 359 if (col < 0 && !((Mat_SeqSBAIJ *)baij->A->data)->nonew) { 360 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 361 col = in[j]; 362 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 363 B = baij->B; 364 b = (Mat_SeqBAIJ *)B->data; 365 bimax = b->imax; 366 bi = b->i; 367 bilen = b->ilen; 368 bj = b->j; 369 ba = b->a; 370 } else col += in[j] % bs; 371 } else col = in[j]; 372 if (roworiented) value = v[i * n + j]; 373 else value = v[i + j * m]; 374 MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 375 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 376 } 377 } 378 } else { /* off processor entry */ 379 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 380 if (!baij->donotstash) { 381 mat->assembled = PETSC_FALSE; 382 n_loc = 0; 383 for (j = 0; j < n; j++) { 384 if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */ 385 in_loc[n_loc] = in[j]; 386 if (roworiented) { 387 v_loc[n_loc] = v[i * n + j]; 388 } else { 389 v_loc[n_loc] = v[j * m + i]; 390 } 391 n_loc++; 392 } 393 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE)); 394 } 395 } 396 } 397 PetscFunctionReturn(PETSC_SUCCESS); 398 } 399 400 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 401 { 402 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 403 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 404 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 405 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 406 PetscBool roworiented = a->roworiented; 407 const PetscScalar *value = v; 408 MatScalar *ap, *aa = a->a, *bap; 409 410 PetscFunctionBegin; 411 if (col < row) { 412 PetscCheck(a->ignore_ltriangular, PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 413 PetscFunctionReturn(PETSC_SUCCESS); /* ignore lower triangular block */ 414 } 415 rp = aj + ai[row]; 416 ap = aa + bs2 * ai[row]; 417 rmax = imax[row]; 418 nrow = ailen[row]; 419 value = v; 420 low = 0; 421 high = nrow; 422 423 while (high - low > 7) { 424 t = (low + high) / 2; 425 if (rp[t] > col) high = t; 426 else low = t; 427 } 428 for (i = low; i < high; i++) { 429 if (rp[i] > col) break; 430 if (rp[i] == col) { 431 bap = ap + bs2 * i; 432 if (roworiented) { 433 if (is == ADD_VALUES) { 434 for (ii = 0; ii < bs; ii++) { 435 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 436 } 437 } else { 438 for (ii = 0; ii < bs; ii++) { 439 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 440 } 441 } 442 } else { 443 if (is == ADD_VALUES) { 444 for (ii = 0; ii < bs; ii++) { 445 for (jj = 0; jj < bs; jj++) *bap++ += *value++; 446 } 447 } else { 448 for (ii = 0; ii < bs; ii++) { 449 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 450 } 451 } 452 } 453 goto noinsert2; 454 } 455 } 456 if (nonew == 1) goto noinsert2; 457 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 458 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 459 N = nrow++ - 1; 460 high++; 461 /* shift up all the later entries in this row */ 462 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 463 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 464 rp[i] = col; 465 bap = ap + bs2 * i; 466 if (roworiented) { 467 for (ii = 0; ii < bs; ii++) { 468 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 469 } 470 } else { 471 for (ii = 0; ii < bs; ii++) { 472 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 473 } 474 } 475 noinsert2:; 476 ailen[row] = nrow; 477 PetscFunctionReturn(PETSC_SUCCESS); 478 } 479 480 /* 481 This routine is exactly duplicated in mpibaij.c 482 */ 483 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 484 { 485 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 486 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 487 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 488 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 489 PetscBool roworiented = a->roworiented; 490 const PetscScalar *value = v; 491 MatScalar *ap, *aa = a->a, *bap; 492 493 PetscFunctionBegin; 494 rp = aj + ai[row]; 495 ap = aa + bs2 * ai[row]; 496 rmax = imax[row]; 497 nrow = ailen[row]; 498 low = 0; 499 high = nrow; 500 value = v; 501 while (high - low > 7) { 502 t = (low + high) / 2; 503 if (rp[t] > col) high = t; 504 else low = t; 505 } 506 for (i = low; i < high; i++) { 507 if (rp[i] > col) break; 508 if (rp[i] == col) { 509 bap = ap + bs2 * i; 510 if (roworiented) { 511 if (is == ADD_VALUES) { 512 for (ii = 0; ii < bs; ii++) { 513 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 514 } 515 } else { 516 for (ii = 0; ii < bs; ii++) { 517 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 518 } 519 } 520 } else { 521 if (is == ADD_VALUES) { 522 for (ii = 0; ii < bs; ii++, value += bs) { 523 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 524 bap += bs; 525 } 526 } else { 527 for (ii = 0; ii < bs; ii++, value += bs) { 528 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 529 bap += bs; 530 } 531 } 532 } 533 goto noinsert2; 534 } 535 } 536 if (nonew == 1) goto noinsert2; 537 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 538 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 539 N = nrow++ - 1; 540 high++; 541 /* shift up all the later entries in this row */ 542 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 543 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 544 rp[i] = col; 545 bap = ap + bs2 * i; 546 if (roworiented) { 547 for (ii = 0; ii < bs; ii++) { 548 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 549 } 550 } else { 551 for (ii = 0; ii < bs; ii++) { 552 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 553 } 554 } 555 noinsert2:; 556 ailen[row] = nrow; 557 PetscFunctionReturn(PETSC_SUCCESS); 558 } 559 560 /* 561 This routine could be optimized by removing the need for the block copy below and passing stride information 562 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 563 */ 564 static PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv) 565 { 566 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 567 const MatScalar *value; 568 MatScalar *barray = baij->barray; 569 PetscBool roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular; 570 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 571 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 572 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 573 574 PetscFunctionBegin; 575 if (!barray) { 576 PetscCall(PetscMalloc1(bs2, &barray)); 577 baij->barray = barray; 578 } 579 580 if (roworiented) { 581 stepval = (n - 1) * bs; 582 } else { 583 stepval = (m - 1) * bs; 584 } 585 for (i = 0; i < m; i++) { 586 if (im[i] < 0) continue; 587 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 588 if (im[i] >= rstart && im[i] < rend) { 589 row = im[i] - rstart; 590 for (j = 0; j < n; j++) { 591 if (im[i] > in[j]) { 592 PetscCheck(ignore_ltriangular, PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 593 continue; /* ignore lower triangular blocks */ 594 } 595 /* If NumCol = 1 then a copy is not required */ 596 if (roworiented && n == 1) { 597 barray = (MatScalar *)v + i * bs2; 598 } else if ((!roworiented) && (m == 1)) { 599 barray = (MatScalar *)v + j * bs2; 600 } else { /* Here a copy is required */ 601 if (roworiented) { 602 value = v + i * (stepval + bs) * bs + j * bs; 603 } else { 604 value = v + j * (stepval + bs) * bs + i * bs; 605 } 606 for (ii = 0; ii < bs; ii++, value += stepval) { 607 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 608 } 609 barray -= bs2; 610 } 611 612 if (in[j] >= cstart && in[j] < cend) { 613 col = in[j] - cstart; 614 PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 615 } else if (in[j] < 0) { 616 continue; 617 } else { 618 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 619 if (mat->was_assembled) { 620 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 621 622 #if defined(PETSC_USE_CTABLE) 623 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 624 col = col < 1 ? -1 : (col - 1) / bs; 625 #else 626 col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs; 627 #endif 628 if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 629 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 630 col = in[j]; 631 } 632 } else col = in[j]; 633 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 634 } 635 } 636 } else { 637 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 638 if (!baij->donotstash) { 639 if (roworiented) { 640 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 641 } else { 642 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 643 } 644 } 645 } 646 } 647 PetscFunctionReturn(PETSC_SUCCESS); 648 } 649 650 static PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 651 { 652 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 653 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 654 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 655 656 PetscFunctionBegin; 657 for (i = 0; i < m; i++) { 658 if (idxm[i] < 0) continue; /* negative row */ 659 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 660 PetscCheck(idxm[i] >= bsrstart && idxm[i] < bsrend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 661 row = idxm[i] - bsrstart; 662 for (j = 0; j < n; j++) { 663 if (idxn[j] < 0) continue; /* negative column */ 664 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 665 if (idxn[j] >= bscstart && idxn[j] < bscend) { 666 col = idxn[j] - bscstart; 667 PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 668 } else { 669 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 670 #if defined(PETSC_USE_CTABLE) 671 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 672 data--; 673 #else 674 data = baij->colmap[idxn[j] / bs] - 1; 675 #endif 676 if (data < 0 || baij->garray[data / bs] != idxn[j] / bs) *(v + i * n + j) = 0.0; 677 else { 678 col = data + idxn[j] % bs; 679 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 680 } 681 } 682 } 683 } 684 PetscFunctionReturn(PETSC_SUCCESS); 685 } 686 687 static PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm) 688 { 689 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 690 PetscReal sum[2], *lnorm2; 691 692 PetscFunctionBegin; 693 if (baij->size == 1) { 694 PetscCall(MatNorm(baij->A, type, norm)); 695 } else { 696 if (type == NORM_FROBENIUS) { 697 PetscCall(PetscMalloc1(2, &lnorm2)); 698 PetscCall(MatNorm(baij->A, type, lnorm2)); 699 *lnorm2 = (*lnorm2) * (*lnorm2); 700 lnorm2++; /* square power of norm(A) */ 701 PetscCall(MatNorm(baij->B, type, lnorm2)); 702 *lnorm2 = (*lnorm2) * (*lnorm2); 703 lnorm2--; /* square power of norm(B) */ 704 PetscCallMPI(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 705 *norm = PetscSqrtReal(sum[0] + 2 * sum[1]); 706 PetscCall(PetscFree(lnorm2)); 707 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 708 Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data; 709 Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ *)baij->B->data; 710 PetscReal *rsum, vabs; 711 PetscInt *jj, *garray = baij->garray, rstart = baij->rstartbs, nz; 712 PetscInt brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs; 713 MatScalar *v; 714 715 PetscCall(PetscCalloc1(mat->cmap->N, &rsum)); 716 /* Amat */ 717 v = amat->a; 718 jj = amat->j; 719 for (brow = 0; brow < mbs; brow++) { 720 grow = bs * (rstart + brow); 721 nz = amat->i[brow + 1] - amat->i[brow]; 722 for (bcol = 0; bcol < nz; bcol++) { 723 gcol = bs * (rstart + *jj); 724 jj++; 725 for (col = 0; col < bs; col++) { 726 for (row = 0; row < bs; row++) { 727 vabs = PetscAbsScalar(*v); 728 v++; 729 rsum[gcol + col] += vabs; 730 /* non-diagonal block */ 731 if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs; 732 } 733 } 734 } 735 PetscCall(PetscLogFlops(nz * bs * bs)); 736 } 737 /* Bmat */ 738 v = bmat->a; 739 jj = bmat->j; 740 for (brow = 0; brow < mbs; brow++) { 741 grow = bs * (rstart + brow); 742 nz = bmat->i[brow + 1] - bmat->i[brow]; 743 for (bcol = 0; bcol < nz; bcol++) { 744 gcol = bs * garray[*jj]; 745 jj++; 746 for (col = 0; col < bs; col++) { 747 for (row = 0; row < bs; row++) { 748 vabs = PetscAbsScalar(*v); 749 v++; 750 rsum[gcol + col] += vabs; 751 rsum[grow + row] += vabs; 752 } 753 } 754 } 755 PetscCall(PetscLogFlops(nz * bs * bs)); 756 } 757 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, rsum, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 758 *norm = 0.0; 759 for (col = 0; col < mat->cmap->N; col++) { 760 if (rsum[col] > *norm) *norm = rsum[col]; 761 } 762 PetscCall(PetscFree(rsum)); 763 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet"); 764 } 765 PetscFunctionReturn(PETSC_SUCCESS); 766 } 767 768 static PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode) 769 { 770 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 771 PetscInt nstash, reallocs; 772 773 PetscFunctionBegin; 774 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 775 776 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 777 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 778 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 779 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 780 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 781 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 782 PetscFunctionReturn(PETSC_SUCCESS); 783 } 784 785 static PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode) 786 { 787 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 788 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)baij->A->data; 789 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 790 PetscInt *row, *col; 791 PetscBool all_assembled; 792 PetscMPIInt n; 793 PetscBool r1, r2, r3; 794 MatScalar *val; 795 796 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 797 PetscFunctionBegin; 798 if (!baij->donotstash && !mat->nooffprocentries) { 799 while (1) { 800 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 801 if (!flg) break; 802 803 for (i = 0; i < n;) { 804 /* Now identify the consecutive vals belonging to the same row */ 805 for (j = i, rstart = row[j]; j < n; j++) { 806 if (row[j] != rstart) break; 807 } 808 if (j < n) ncols = j - i; 809 else ncols = n - i; 810 /* Now assemble all these values with a single function call */ 811 PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 812 i = j; 813 } 814 } 815 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 816 /* Now process the block-stash. Since the values are stashed column-oriented, 817 set the row-oriented flag to column-oriented, and after MatSetValues() 818 restore the original flags */ 819 r1 = baij->roworiented; 820 r2 = a->roworiented; 821 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 822 823 baij->roworiented = PETSC_FALSE; 824 a->roworiented = PETSC_FALSE; 825 826 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworiented */ 827 while (1) { 828 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 829 if (!flg) break; 830 831 for (i = 0; i < n;) { 832 /* Now identify the consecutive vals belonging to the same row */ 833 for (j = i, rstart = row[j]; j < n; j++) { 834 if (row[j] != rstart) break; 835 } 836 if (j < n) ncols = j - i; 837 else ncols = n - i; 838 PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 839 i = j; 840 } 841 } 842 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 843 844 baij->roworiented = r1; 845 a->roworiented = r2; 846 847 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworiented */ 848 } 849 850 PetscCall(MatAssemblyBegin(baij->A, mode)); 851 PetscCall(MatAssemblyEnd(baij->A, mode)); 852 853 /* determine if any process has disassembled, if so we must 854 also disassemble ourselves, in order that we may reassemble. */ 855 /* 856 if nonzero structure of submatrix B cannot change then we know that 857 no process disassembled thus we can skip this stuff 858 */ 859 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 860 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &all_assembled, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 861 if (mat->was_assembled && !all_assembled) PetscCall(MatDisAssemble_MPISBAIJ(mat)); 862 } 863 864 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ 865 PetscCall(MatAssemblyBegin(baij->B, mode)); 866 PetscCall(MatAssemblyEnd(baij->B, mode)); 867 868 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 869 870 baij->rowvalues = NULL; 871 872 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 873 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 874 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 875 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 876 } 877 PetscFunctionReturn(PETSC_SUCCESS); 878 } 879 880 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode); 881 #include <petscdraw.h> 882 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 883 { 884 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 885 PetscInt bs = mat->rmap->bs; 886 PetscMPIInt rank = baij->rank; 887 PetscBool isascii, isdraw; 888 PetscViewer sviewer; 889 PetscViewerFormat format; 890 891 PetscFunctionBegin; 892 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 893 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 894 if (isascii) { 895 PetscCall(PetscViewerGetFormat(viewer, &format)); 896 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 897 MatInfo info; 898 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 899 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 900 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 901 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 902 mat->rmap->bs, info.memory)); 903 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 904 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 905 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 906 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 907 PetscCall(PetscViewerFlush(viewer)); 908 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 909 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 910 PetscCall(VecScatterView(baij->Mvctx, viewer)); 911 PetscFunctionReturn(PETSC_SUCCESS); 912 } else if (format == PETSC_VIEWER_ASCII_INFO) { 913 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 914 PetscFunctionReturn(PETSC_SUCCESS); 915 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 916 PetscFunctionReturn(PETSC_SUCCESS); 917 } 918 } 919 920 if (isdraw) { 921 PetscDraw draw; 922 PetscBool isnull; 923 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 924 PetscCall(PetscDrawIsNull(draw, &isnull)); 925 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 926 } 927 928 { 929 /* assemble the entire matrix onto first processor. */ 930 Mat A; 931 Mat_SeqSBAIJ *Aloc; 932 Mat_SeqBAIJ *Bloc; 933 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 934 MatScalar *a; 935 const char *matname; 936 937 /* Should this be the same type as mat? */ 938 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 939 if (rank == 0) { 940 PetscCall(MatSetSizes(A, M, N, M, N)); 941 } else { 942 PetscCall(MatSetSizes(A, 0, 0, M, N)); 943 } 944 PetscCall(MatSetType(A, MATMPISBAIJ)); 945 PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 946 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 947 948 /* copy over the A part */ 949 Aloc = (Mat_SeqSBAIJ *)baij->A->data; 950 ai = Aloc->i; 951 aj = Aloc->j; 952 a = Aloc->a; 953 PetscCall(PetscMalloc1(bs, &rvals)); 954 955 for (i = 0; i < mbs; i++) { 956 rvals[0] = bs * (baij->rstartbs + i); 957 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 958 for (j = ai[i]; j < ai[i + 1]; j++) { 959 col = (baij->cstartbs + aj[j]) * bs; 960 for (k = 0; k < bs; k++) { 961 PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 962 col++; 963 a += bs; 964 } 965 } 966 } 967 /* copy over the B part */ 968 Bloc = (Mat_SeqBAIJ *)baij->B->data; 969 ai = Bloc->i; 970 aj = Bloc->j; 971 a = Bloc->a; 972 for (i = 0; i < mbs; i++) { 973 rvals[0] = bs * (baij->rstartbs + i); 974 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 975 for (j = ai[i]; j < ai[i + 1]; j++) { 976 col = baij->garray[aj[j]] * bs; 977 for (k = 0; k < bs; k++) { 978 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 979 col++; 980 a += bs; 981 } 982 } 983 } 984 PetscCall(PetscFree(rvals)); 985 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 986 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 987 /* 988 Everyone has to call to draw the matrix since the graphics waits are 989 synchronized across all processors that share the PetscDraw object 990 */ 991 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 992 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 993 if (rank == 0) { 994 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)A->data)->A, matname)); 995 PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)A->data)->A, sviewer)); 996 } 997 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 998 PetscCall(MatDestroy(&A)); 999 } 1000 PetscFunctionReturn(PETSC_SUCCESS); 1001 } 1002 1003 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1004 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary 1005 1006 static PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer) 1007 { 1008 PetscBool isascii, isdraw, issocket, isbinary; 1009 1010 PetscFunctionBegin; 1011 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 1012 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1013 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1014 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1015 if (isascii || isdraw || issocket) PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer)); 1016 else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer)); 1017 PetscFunctionReturn(PETSC_SUCCESS); 1018 } 1019 1020 #if defined(PETSC_USE_COMPLEX) 1021 static PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy) 1022 { 1023 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1024 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1025 PetscScalar *from; 1026 const PetscScalar *x; 1027 1028 PetscFunctionBegin; 1029 /* diagonal part */ 1030 PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 1031 /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */ 1032 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1033 PetscCall(VecZeroEntries(a->slvec1b)); 1034 1035 /* subdiagonal part */ 1036 PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 1037 PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1038 1039 /* copy x into the vec slvec0 */ 1040 PetscCall(VecGetArray(a->slvec0, &from)); 1041 PetscCall(VecGetArrayRead(xx, &x)); 1042 1043 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1044 PetscCall(VecRestoreArray(a->slvec0, &from)); 1045 PetscCall(VecRestoreArrayRead(xx, &x)); 1046 1047 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1048 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1049 /* supperdiagonal part */ 1050 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 1051 PetscFunctionReturn(PETSC_SUCCESS); 1052 } 1053 #endif 1054 1055 static PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy) 1056 { 1057 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1058 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1059 PetscScalar *from; 1060 const PetscScalar *x; 1061 1062 PetscFunctionBegin; 1063 /* diagonal part */ 1064 PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 1065 /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */ 1066 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1067 PetscCall(VecZeroEntries(a->slvec1b)); 1068 1069 /* subdiagonal part */ 1070 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1071 1072 /* copy x into the vec slvec0 */ 1073 PetscCall(VecGetArray(a->slvec0, &from)); 1074 PetscCall(VecGetArrayRead(xx, &x)); 1075 1076 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1077 PetscCall(VecRestoreArray(a->slvec0, &from)); 1078 PetscCall(VecRestoreArrayRead(xx, &x)); 1079 1080 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1081 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1082 /* supperdiagonal part */ 1083 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 1084 PetscFunctionReturn(PETSC_SUCCESS); 1085 } 1086 1087 #if PetscDefined(USE_COMPLEX) 1088 static PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz) 1089 { 1090 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1091 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1092 PetscScalar *from; 1093 const PetscScalar *x; 1094 1095 PetscFunctionBegin; 1096 /* diagonal part */ 1097 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 1098 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1099 PetscCall(VecZeroEntries(a->slvec1b)); 1100 1101 /* subdiagonal part */ 1102 PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 1103 PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1104 1105 /* copy x into the vec slvec0 */ 1106 PetscCall(VecGetArray(a->slvec0, &from)); 1107 PetscCall(VecGetArrayRead(xx, &x)); 1108 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1109 PetscCall(VecRestoreArray(a->slvec0, &from)); 1110 1111 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1112 PetscCall(VecRestoreArrayRead(xx, &x)); 1113 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1114 1115 /* supperdiagonal part */ 1116 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 1117 PetscFunctionReturn(PETSC_SUCCESS); 1118 } 1119 #endif 1120 1121 static PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1122 { 1123 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1124 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1125 PetscScalar *from; 1126 const PetscScalar *x; 1127 1128 PetscFunctionBegin; 1129 /* diagonal part */ 1130 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 1131 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1132 PetscCall(VecZeroEntries(a->slvec1b)); 1133 1134 /* subdiagonal part */ 1135 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1136 1137 /* copy x into the vec slvec0 */ 1138 PetscCall(VecGetArray(a->slvec0, &from)); 1139 PetscCall(VecGetArrayRead(xx, &x)); 1140 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1141 PetscCall(VecRestoreArray(a->slvec0, &from)); 1142 1143 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1144 PetscCall(VecRestoreArrayRead(xx, &x)); 1145 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1146 1147 /* supperdiagonal part */ 1148 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 1149 PetscFunctionReturn(PETSC_SUCCESS); 1150 } 1151 1152 /* 1153 This only works correctly for square matrices where the subblock A->A is the 1154 diagonal block 1155 */ 1156 static PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v) 1157 { 1158 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1159 1160 PetscFunctionBegin; 1161 /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1162 PetscCall(MatGetDiagonal(a->A, v)); 1163 PetscFunctionReturn(PETSC_SUCCESS); 1164 } 1165 1166 static PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa) 1167 { 1168 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1169 1170 PetscFunctionBegin; 1171 PetscCall(MatScale(a->A, aa)); 1172 PetscCall(MatScale(a->B, aa)); 1173 PetscFunctionReturn(PETSC_SUCCESS); 1174 } 1175 1176 static PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1177 { 1178 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 1179 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1180 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1181 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1182 PetscInt *cmap, *idx_p, cstart = mat->rstartbs; 1183 1184 PetscFunctionBegin; 1185 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1186 mat->getrowactive = PETSC_TRUE; 1187 1188 if (!mat->rowvalues && (idx || v)) { 1189 /* 1190 allocate enough space to hold information from the longest row. 1191 */ 1192 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ *)mat->A->data; 1193 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ *)mat->B->data; 1194 PetscInt max = 1, mbs = mat->mbs, tmp; 1195 for (i = 0; i < mbs; i++) { 1196 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */ 1197 if (max < tmp) max = tmp; 1198 } 1199 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1200 } 1201 1202 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1203 lrow = row - brstart; /* local row index */ 1204 1205 pvA = &vworkA; 1206 pcA = &cworkA; 1207 pvB = &vworkB; 1208 pcB = &cworkB; 1209 if (!v) { 1210 pvA = NULL; 1211 pvB = NULL; 1212 } 1213 if (!idx) { 1214 pcA = NULL; 1215 if (!v) pcB = NULL; 1216 } 1217 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1218 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1219 nztot = nzA + nzB; 1220 1221 cmap = mat->garray; 1222 if (v || idx) { 1223 if (nztot) { 1224 /* Sort by increasing column numbers, assuming A and B already sorted */ 1225 PetscInt imark = -1; 1226 if (v) { 1227 *v = v_p = mat->rowvalues; 1228 for (i = 0; i < nzB; i++) { 1229 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1230 else break; 1231 } 1232 imark = i; 1233 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1234 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1235 } 1236 if (idx) { 1237 *idx = idx_p = mat->rowindices; 1238 if (imark > -1) { 1239 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1240 } else { 1241 for (i = 0; i < nzB; i++) { 1242 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1243 else break; 1244 } 1245 imark = i; 1246 } 1247 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1248 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1249 } 1250 } else { 1251 if (idx) *idx = NULL; 1252 if (v) *v = NULL; 1253 } 1254 } 1255 *nz = nztot; 1256 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1257 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1258 PetscFunctionReturn(PETSC_SUCCESS); 1259 } 1260 1261 static PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1262 { 1263 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1264 1265 PetscFunctionBegin; 1266 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1267 baij->getrowactive = PETSC_FALSE; 1268 PetscFunctionReturn(PETSC_SUCCESS); 1269 } 1270 1271 static PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1272 { 1273 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1274 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1275 1276 PetscFunctionBegin; 1277 aA->getrow_utriangular = PETSC_TRUE; 1278 PetscFunctionReturn(PETSC_SUCCESS); 1279 } 1280 static PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1281 { 1282 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1283 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1284 1285 PetscFunctionBegin; 1286 aA->getrow_utriangular = PETSC_FALSE; 1287 PetscFunctionReturn(PETSC_SUCCESS); 1288 } 1289 1290 static PetscErrorCode MatConjugate_MPISBAIJ(Mat mat) 1291 { 1292 PetscFunctionBegin; 1293 if (PetscDefined(USE_COMPLEX)) { 1294 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data; 1295 1296 PetscCall(MatConjugate(a->A)); 1297 PetscCall(MatConjugate(a->B)); 1298 } 1299 PetscFunctionReturn(PETSC_SUCCESS); 1300 } 1301 1302 static PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1303 { 1304 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1305 1306 PetscFunctionBegin; 1307 PetscCall(MatRealPart(a->A)); 1308 PetscCall(MatRealPart(a->B)); 1309 PetscFunctionReturn(PETSC_SUCCESS); 1310 } 1311 1312 static PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1313 { 1314 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1315 1316 PetscFunctionBegin; 1317 PetscCall(MatImaginaryPart(a->A)); 1318 PetscCall(MatImaginaryPart(a->B)); 1319 PetscFunctionReturn(PETSC_SUCCESS); 1320 } 1321 1322 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 1323 Input: isrow - distributed(parallel), 1324 iscol_local - locally owned (seq) 1325 */ 1326 static PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg) 1327 { 1328 PetscInt sz1, sz2, *a1, *a2, i, j, k, nmatch; 1329 const PetscInt *ptr1, *ptr2; 1330 1331 PetscFunctionBegin; 1332 *flg = PETSC_FALSE; 1333 PetscCall(ISGetLocalSize(isrow, &sz1)); 1334 PetscCall(ISGetLocalSize(iscol_local, &sz2)); 1335 if (sz1 > sz2) PetscFunctionReturn(PETSC_SUCCESS); 1336 1337 PetscCall(ISGetIndices(isrow, &ptr1)); 1338 PetscCall(ISGetIndices(iscol_local, &ptr2)); 1339 1340 PetscCall(PetscMalloc1(sz1, &a1)); 1341 PetscCall(PetscMalloc1(sz2, &a2)); 1342 PetscCall(PetscArraycpy(a1, ptr1, sz1)); 1343 PetscCall(PetscArraycpy(a2, ptr2, sz2)); 1344 PetscCall(PetscSortInt(sz1, a1)); 1345 PetscCall(PetscSortInt(sz2, a2)); 1346 1347 nmatch = 0; 1348 k = 0; 1349 for (i = 0; i < sz1; i++) { 1350 for (j = k; j < sz2; j++) { 1351 if (a1[i] == a2[j]) { 1352 k = j; 1353 nmatch++; 1354 break; 1355 } 1356 } 1357 } 1358 PetscCall(ISRestoreIndices(isrow, &ptr1)); 1359 PetscCall(ISRestoreIndices(iscol_local, &ptr2)); 1360 PetscCall(PetscFree(a1)); 1361 PetscCall(PetscFree(a2)); 1362 if (nmatch < sz1) { 1363 *flg = PETSC_FALSE; 1364 } else { 1365 *flg = PETSC_TRUE; 1366 } 1367 PetscFunctionReturn(PETSC_SUCCESS); 1368 } 1369 1370 static PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1371 { 1372 Mat C[2]; 1373 IS iscol_local, isrow_local; 1374 PetscInt csize, csize_local, rsize; 1375 PetscBool isequal, issorted, isidentity = PETSC_FALSE; 1376 1377 PetscFunctionBegin; 1378 PetscCall(ISGetLocalSize(iscol, &csize)); 1379 PetscCall(ISGetLocalSize(isrow, &rsize)); 1380 if (call == MAT_REUSE_MATRIX) { 1381 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1382 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1383 } else { 1384 PetscCall(ISAllGather(iscol, &iscol_local)); 1385 PetscCall(ISSorted(iscol_local, &issorted)); 1386 PetscCheck(issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must be sorted"); 1387 } 1388 PetscCall(ISEqual_private(isrow, iscol_local, &isequal)); 1389 if (!isequal) { 1390 PetscCall(ISGetLocalSize(iscol_local, &csize_local)); 1391 isidentity = (PetscBool)(mat->cmap->N == csize_local); 1392 if (!isidentity) { 1393 if (call == MAT_REUSE_MATRIX) { 1394 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather_other", (PetscObject *)&isrow_local)); 1395 PetscCheck(isrow_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1396 } else { 1397 PetscCall(ISAllGather(isrow, &isrow_local)); 1398 PetscCall(ISSorted(isrow_local, &issorted)); 1399 PetscCheck(issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, isrow must be sorted"); 1400 } 1401 } 1402 } 1403 /* now call MatCreateSubMatrix_MPIBAIJ() */ 1404 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, isequal || isidentity ? call : MAT_INITIAL_MATRIX, isequal || isidentity ? newmat : C, (PetscBool)(isequal || isidentity))); 1405 if (!isequal && !isidentity) { 1406 if (call == MAT_INITIAL_MATRIX) { 1407 IS intersect; 1408 PetscInt ni; 1409 1410 PetscCall(ISIntersect(isrow_local, iscol_local, &intersect)); 1411 PetscCall(ISGetLocalSize(intersect, &ni)); 1412 PetscCall(ISDestroy(&intersect)); 1413 PetscCheck(ni == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create such a submatrix: for symmetric format, when requesting an off-diagonal submatrix, isrow and iscol should have an empty intersection (number of common indices is %" PetscInt_FMT ")", ni); 1414 } 1415 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, iscol, isrow_local, rsize, MAT_INITIAL_MATRIX, C + 1, PETSC_FALSE)); 1416 PetscCall(MatTranspose(C[1], MAT_INPLACE_MATRIX, C + 1)); 1417 PetscCall(MatAXPY(C[0], 1.0, C[1], DIFFERENT_NONZERO_PATTERN)); 1418 if (call == MAT_REUSE_MATRIX) PetscCall(MatCopy(C[0], *newmat, SAME_NONZERO_PATTERN)); 1419 else if (mat->rmap->bs == 1) PetscCall(MatConvert(C[0], MATAIJ, MAT_INITIAL_MATRIX, newmat)); 1420 else PetscCall(MatCopy(C[0], *newmat, SAME_NONZERO_PATTERN)); 1421 PetscCall(MatDestroy(C)); 1422 PetscCall(MatDestroy(C + 1)); 1423 } 1424 if (call == MAT_INITIAL_MATRIX) { 1425 if (!isequal && !isidentity) { 1426 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather_other", (PetscObject)isrow_local)); 1427 PetscCall(ISDestroy(&isrow_local)); 1428 } 1429 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1430 PetscCall(ISDestroy(&iscol_local)); 1431 } 1432 PetscFunctionReturn(PETSC_SUCCESS); 1433 } 1434 1435 static PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1436 { 1437 Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data; 1438 1439 PetscFunctionBegin; 1440 PetscCall(MatZeroEntries(l->A)); 1441 PetscCall(MatZeroEntries(l->B)); 1442 PetscFunctionReturn(PETSC_SUCCESS); 1443 } 1444 1445 static PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1446 { 1447 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)matin->data; 1448 Mat A = a->A, B = a->B; 1449 PetscLogDouble isend[5], irecv[5]; 1450 1451 PetscFunctionBegin; 1452 info->block_size = (PetscReal)matin->rmap->bs; 1453 1454 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1455 1456 isend[0] = info->nz_used; 1457 isend[1] = info->nz_allocated; 1458 isend[2] = info->nz_unneeded; 1459 isend[3] = info->memory; 1460 isend[4] = info->mallocs; 1461 1462 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1463 1464 isend[0] += info->nz_used; 1465 isend[1] += info->nz_allocated; 1466 isend[2] += info->nz_unneeded; 1467 isend[3] += info->memory; 1468 isend[4] += info->mallocs; 1469 if (flag == MAT_LOCAL) { 1470 info->nz_used = isend[0]; 1471 info->nz_allocated = isend[1]; 1472 info->nz_unneeded = isend[2]; 1473 info->memory = isend[3]; 1474 info->mallocs = isend[4]; 1475 } else if (flag == MAT_GLOBAL_MAX) { 1476 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1477 1478 info->nz_used = irecv[0]; 1479 info->nz_allocated = irecv[1]; 1480 info->nz_unneeded = irecv[2]; 1481 info->memory = irecv[3]; 1482 info->mallocs = irecv[4]; 1483 } else if (flag == MAT_GLOBAL_SUM) { 1484 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1485 1486 info->nz_used = irecv[0]; 1487 info->nz_allocated = irecv[1]; 1488 info->nz_unneeded = irecv[2]; 1489 info->memory = irecv[3]; 1490 info->mallocs = irecv[4]; 1491 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1492 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1493 info->fill_ratio_needed = 0; 1494 info->factor_mallocs = 0; 1495 PetscFunctionReturn(PETSC_SUCCESS); 1496 } 1497 1498 static PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg) 1499 { 1500 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1501 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1502 1503 PetscFunctionBegin; 1504 switch (op) { 1505 case MAT_NEW_NONZERO_LOCATIONS: 1506 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1507 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1508 case MAT_KEEP_NONZERO_PATTERN: 1509 case MAT_NEW_NONZERO_LOCATION_ERR: 1510 MatCheckPreallocated(A, 1); 1511 PetscCall(MatSetOption(a->A, op, flg)); 1512 PetscCall(MatSetOption(a->B, op, flg)); 1513 break; 1514 case MAT_ROW_ORIENTED: 1515 MatCheckPreallocated(A, 1); 1516 a->roworiented = flg; 1517 1518 PetscCall(MatSetOption(a->A, op, flg)); 1519 PetscCall(MatSetOption(a->B, op, flg)); 1520 break; 1521 case MAT_IGNORE_OFF_PROC_ENTRIES: 1522 a->donotstash = flg; 1523 break; 1524 case MAT_USE_HASH_TABLE: 1525 a->ht_flag = flg; 1526 break; 1527 case MAT_HERMITIAN: 1528 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1529 #if defined(PETSC_USE_COMPLEX) 1530 if (flg) { /* need different mat-vec ops */ 1531 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1532 A->ops->multadd = MatMultAdd_MPISBAIJ_Hermitian; 1533 A->ops->multtranspose = NULL; 1534 A->ops->multtransposeadd = NULL; 1535 } 1536 #endif 1537 break; 1538 case MAT_SPD: 1539 case MAT_SYMMETRIC: 1540 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1541 #if defined(PETSC_USE_COMPLEX) 1542 if (flg) { /* restore to use default mat-vec ops */ 1543 A->ops->mult = MatMult_MPISBAIJ; 1544 A->ops->multadd = MatMultAdd_MPISBAIJ; 1545 A->ops->multtranspose = MatMult_MPISBAIJ; 1546 A->ops->multtransposeadd = MatMultAdd_MPISBAIJ; 1547 } 1548 #endif 1549 break; 1550 case MAT_STRUCTURALLY_SYMMETRIC: 1551 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1552 break; 1553 case MAT_IGNORE_LOWER_TRIANGULAR: 1554 case MAT_ERROR_LOWER_TRIANGULAR: 1555 aA->ignore_ltriangular = flg; 1556 break; 1557 case MAT_GETROW_UPPERTRIANGULAR: 1558 aA->getrow_utriangular = flg; 1559 break; 1560 default: 1561 break; 1562 } 1563 PetscFunctionReturn(PETSC_SUCCESS); 1564 } 1565 1566 static PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B) 1567 { 1568 PetscFunctionBegin; 1569 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 1570 if (reuse == MAT_INITIAL_MATRIX) { 1571 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B)); 1572 } else if (reuse == MAT_REUSE_MATRIX) { 1573 PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN)); 1574 } 1575 PetscFunctionReturn(PETSC_SUCCESS); 1576 } 1577 1578 static PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr) 1579 { 1580 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1581 Mat a = baij->A, b = baij->B; 1582 PetscInt nv, m, n; 1583 1584 PetscFunctionBegin; 1585 if (!ll) PetscFunctionReturn(PETSC_SUCCESS); 1586 1587 PetscCall(MatGetLocalSize(mat, &m, &n)); 1588 PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n); 1589 1590 PetscCall(VecGetLocalSize(rr, &nv)); 1591 PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size"); 1592 1593 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1594 1595 /* left diagonalscale the off-diagonal part */ 1596 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1597 1598 /* scale the diagonal part */ 1599 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1600 1601 /* right diagonalscale the off-diagonal part */ 1602 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1603 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1604 PetscFunctionReturn(PETSC_SUCCESS); 1605 } 1606 1607 static PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1608 { 1609 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1610 1611 PetscFunctionBegin; 1612 PetscCall(MatSetUnfactored(a->A)); 1613 PetscFunctionReturn(PETSC_SUCCESS); 1614 } 1615 1616 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *); 1617 1618 static PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag) 1619 { 1620 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data; 1621 Mat a, b, c, d; 1622 PetscBool flg; 1623 1624 PetscFunctionBegin; 1625 a = matA->A; 1626 b = matA->B; 1627 c = matB->A; 1628 d = matB->B; 1629 1630 PetscCall(MatEqual(a, c, &flg)); 1631 if (flg) PetscCall(MatEqual(b, d, &flg)); 1632 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1633 PetscFunctionReturn(PETSC_SUCCESS); 1634 } 1635 1636 static PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str) 1637 { 1638 PetscBool isbaij; 1639 1640 PetscFunctionBegin; 1641 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, "")); 1642 PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name); 1643 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1644 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1645 PetscCall(MatGetRowUpperTriangular(A)); 1646 PetscCall(MatCopy_Basic(A, B, str)); 1647 PetscCall(MatRestoreRowUpperTriangular(A)); 1648 } else { 1649 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1650 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1651 1652 PetscCall(MatCopy(a->A, b->A, str)); 1653 PetscCall(MatCopy(a->B, b->B, str)); 1654 } 1655 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1656 PetscFunctionReturn(PETSC_SUCCESS); 1657 } 1658 1659 static PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1660 { 1661 Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data; 1662 PetscBLASInt bnz, one = 1; 1663 Mat_SeqSBAIJ *xa, *ya; 1664 Mat_SeqBAIJ *xb, *yb; 1665 1666 PetscFunctionBegin; 1667 if (str == SAME_NONZERO_PATTERN) { 1668 PetscScalar alpha = a; 1669 xa = (Mat_SeqSBAIJ *)xx->A->data; 1670 ya = (Mat_SeqSBAIJ *)yy->A->data; 1671 PetscCall(PetscBLASIntCast(xa->nz, &bnz)); 1672 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one)); 1673 xb = (Mat_SeqBAIJ *)xx->B->data; 1674 yb = (Mat_SeqBAIJ *)yy->B->data; 1675 PetscCall(PetscBLASIntCast(xb->nz, &bnz)); 1676 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one)); 1677 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1678 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1679 PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 1680 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1681 PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 1682 } else { 1683 Mat B; 1684 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1685 PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 1686 PetscCall(MatGetRowUpperTriangular(X)); 1687 PetscCall(MatGetRowUpperTriangular(Y)); 1688 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1689 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1690 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1691 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1692 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1693 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1694 PetscCall(MatSetType(B, MATMPISBAIJ)); 1695 PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d)); 1696 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1697 PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1698 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1699 PetscCall(MatHeaderMerge(Y, &B)); 1700 PetscCall(PetscFree(nnz_d)); 1701 PetscCall(PetscFree(nnz_o)); 1702 PetscCall(MatRestoreRowUpperTriangular(X)); 1703 PetscCall(MatRestoreRowUpperTriangular(Y)); 1704 } 1705 PetscFunctionReturn(PETSC_SUCCESS); 1706 } 1707 1708 static PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[]) 1709 { 1710 PetscInt i; 1711 PetscBool flg; 1712 1713 PetscFunctionBegin; 1714 PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */ 1715 for (i = 0; i < n; i++) { 1716 PetscCall(ISEqual(irow[i], icol[i], &flg)); 1717 if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i])); 1718 } 1719 PetscFunctionReturn(PETSC_SUCCESS); 1720 } 1721 1722 static PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a) 1723 { 1724 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data; 1725 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)maij->A->data; 1726 1727 PetscFunctionBegin; 1728 if (!Y->preallocated) { 1729 PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 1730 } else if (!aij->nz) { 1731 PetscInt nonew = aij->nonew; 1732 PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 1733 aij->nonew = nonew; 1734 } 1735 PetscCall(MatShift_Basic(Y, a)); 1736 PetscFunctionReturn(PETSC_SUCCESS); 1737 } 1738 1739 static PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a) 1740 { 1741 PetscFunctionBegin; 1742 *a = ((Mat_MPISBAIJ *)A->data)->A; 1743 PetscFunctionReturn(PETSC_SUCCESS); 1744 } 1745 1746 static PetscErrorCode MatEliminateZeros_MPISBAIJ(Mat A, PetscBool keep) 1747 { 1748 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1749 1750 PetscFunctionBegin; 1751 PetscCall(MatEliminateZeros_SeqSBAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 1752 PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 1753 PetscFunctionReturn(PETSC_SUCCESS); 1754 } 1755 1756 static PetscErrorCode MatLoad_MPISBAIJ(Mat, PetscViewer); 1757 static PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat, Vec, PetscInt[]); 1758 static PetscErrorCode MatSOR_MPISBAIJ(Mat, Vec, PetscReal, MatSORType, PetscReal, PetscInt, PetscInt, Vec); 1759 1760 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1761 MatGetRow_MPISBAIJ, 1762 MatRestoreRow_MPISBAIJ, 1763 MatMult_MPISBAIJ, 1764 /* 4*/ MatMultAdd_MPISBAIJ, 1765 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1766 MatMultAdd_MPISBAIJ, 1767 NULL, 1768 NULL, 1769 NULL, 1770 /* 10*/ NULL, 1771 NULL, 1772 NULL, 1773 MatSOR_MPISBAIJ, 1774 MatTranspose_MPISBAIJ, 1775 /* 15*/ MatGetInfo_MPISBAIJ, 1776 MatEqual_MPISBAIJ, 1777 MatGetDiagonal_MPISBAIJ, 1778 MatDiagonalScale_MPISBAIJ, 1779 MatNorm_MPISBAIJ, 1780 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1781 MatAssemblyEnd_MPISBAIJ, 1782 MatSetOption_MPISBAIJ, 1783 MatZeroEntries_MPISBAIJ, 1784 /* 24*/ NULL, 1785 NULL, 1786 NULL, 1787 NULL, 1788 NULL, 1789 /* 29*/ MatSetUp_MPI_Hash, 1790 NULL, 1791 NULL, 1792 MatGetDiagonalBlock_MPISBAIJ, 1793 NULL, 1794 /* 34*/ MatDuplicate_MPISBAIJ, 1795 NULL, 1796 NULL, 1797 NULL, 1798 NULL, 1799 /* 39*/ MatAXPY_MPISBAIJ, 1800 MatCreateSubMatrices_MPISBAIJ, 1801 MatIncreaseOverlap_MPISBAIJ, 1802 MatGetValues_MPISBAIJ, 1803 MatCopy_MPISBAIJ, 1804 /* 44*/ NULL, 1805 MatScale_MPISBAIJ, 1806 MatShift_MPISBAIJ, 1807 NULL, 1808 NULL, 1809 /* 49*/ NULL, 1810 NULL, 1811 NULL, 1812 NULL, 1813 NULL, 1814 /* 54*/ NULL, 1815 NULL, 1816 MatSetUnfactored_MPISBAIJ, 1817 NULL, 1818 MatSetValuesBlocked_MPISBAIJ, 1819 /* 59*/ MatCreateSubMatrix_MPISBAIJ, 1820 NULL, 1821 NULL, 1822 NULL, 1823 NULL, 1824 /* 64*/ NULL, 1825 NULL, 1826 NULL, 1827 NULL, 1828 MatGetRowMaxAbs_MPISBAIJ, 1829 /* 69*/ NULL, 1830 MatConvert_MPISBAIJ_Basic, 1831 NULL, 1832 NULL, 1833 NULL, 1834 NULL, 1835 NULL, 1836 NULL, 1837 NULL, 1838 MatLoad_MPISBAIJ, 1839 /* 79*/ NULL, 1840 NULL, 1841 NULL, 1842 NULL, 1843 NULL, 1844 /* 84*/ NULL, 1845 NULL, 1846 NULL, 1847 NULL, 1848 NULL, 1849 /* 89*/ NULL, 1850 NULL, 1851 NULL, 1852 NULL, 1853 MatConjugate_MPISBAIJ, 1854 /* 94*/ NULL, 1855 NULL, 1856 MatRealPart_MPISBAIJ, 1857 MatImaginaryPart_MPISBAIJ, 1858 MatGetRowUpperTriangular_MPISBAIJ, 1859 /* 99*/ MatRestoreRowUpperTriangular_MPISBAIJ, 1860 NULL, 1861 NULL, 1862 NULL, 1863 NULL, 1864 /*104*/ NULL, 1865 NULL, 1866 NULL, 1867 NULL, 1868 NULL, 1869 /*109*/ NULL, 1870 NULL, 1871 NULL, 1872 NULL, 1873 NULL, 1874 /*114*/ NULL, 1875 NULL, 1876 NULL, 1877 NULL, 1878 NULL, 1879 /*119*/ NULL, 1880 NULL, 1881 NULL, 1882 NULL, 1883 NULL, 1884 /*124*/ NULL, 1885 NULL, 1886 MatSetBlockSizes_Default, 1887 NULL, 1888 NULL, 1889 /*129*/ NULL, 1890 MatCreateMPIMatConcatenateSeqMat_MPISBAIJ, 1891 NULL, 1892 NULL, 1893 NULL, 1894 /*134*/ NULL, 1895 NULL, 1896 MatEliminateZeros_MPISBAIJ, 1897 NULL, 1898 NULL, 1899 /*139*/ NULL, 1900 NULL, 1901 MatCopyHashToXAIJ_MPI_Hash, 1902 NULL, 1903 NULL}; 1904 1905 static PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 1906 { 1907 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1908 PetscInt i, mbs, Mbs; 1909 PetscMPIInt size; 1910 1911 PetscFunctionBegin; 1912 if (B->hash_active) { 1913 B->ops[0] = b->cops; 1914 B->hash_active = PETSC_FALSE; 1915 } 1916 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 1917 PetscCall(MatSetBlockSize(B, bs)); 1918 PetscCall(PetscLayoutSetUp(B->rmap)); 1919 PetscCall(PetscLayoutSetUp(B->cmap)); 1920 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 1921 PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N); 1922 PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n); 1923 1924 mbs = B->rmap->n / bs; 1925 Mbs = B->rmap->N / bs; 1926 PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs); 1927 1928 B->rmap->bs = bs; 1929 b->bs2 = bs * bs; 1930 b->mbs = mbs; 1931 b->Mbs = Mbs; 1932 b->nbs = B->cmap->n / bs; 1933 b->Nbs = B->cmap->N / bs; 1934 1935 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 1936 b->rstartbs = B->rmap->rstart / bs; 1937 b->rendbs = B->rmap->rend / bs; 1938 1939 b->cstartbs = B->cmap->rstart / bs; 1940 b->cendbs = B->cmap->rend / bs; 1941 1942 #if defined(PETSC_USE_CTABLE) 1943 PetscCall(PetscHMapIDestroy(&b->colmap)); 1944 #else 1945 PetscCall(PetscFree(b->colmap)); 1946 #endif 1947 PetscCall(PetscFree(b->garray)); 1948 PetscCall(VecDestroy(&b->lvec)); 1949 PetscCall(VecScatterDestroy(&b->Mvctx)); 1950 PetscCall(VecDestroy(&b->slvec0)); 1951 PetscCall(VecDestroy(&b->slvec0b)); 1952 PetscCall(VecDestroy(&b->slvec1)); 1953 PetscCall(VecDestroy(&b->slvec1a)); 1954 PetscCall(VecDestroy(&b->slvec1b)); 1955 PetscCall(VecScatterDestroy(&b->sMvctx)); 1956 1957 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 1958 1959 MatSeqXAIJGetOptions_Private(b->B); 1960 PetscCall(MatDestroy(&b->B)); 1961 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 1962 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 1963 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 1964 MatSeqXAIJRestoreOptions_Private(b->B); 1965 1966 MatSeqXAIJGetOptions_Private(b->A); 1967 PetscCall(MatDestroy(&b->A)); 1968 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 1969 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 1970 PetscCall(MatSetType(b->A, MATSEQSBAIJ)); 1971 MatSeqXAIJRestoreOptions_Private(b->A); 1972 1973 PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 1974 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 1975 1976 B->preallocated = PETSC_TRUE; 1977 B->was_assembled = PETSC_FALSE; 1978 B->assembled = PETSC_FALSE; 1979 PetscFunctionReturn(PETSC_SUCCESS); 1980 } 1981 1982 static PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 1983 { 1984 PetscInt m, rstart, cend; 1985 PetscInt i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 1986 const PetscInt *JJ = NULL; 1987 PetscScalar *values = NULL; 1988 PetscBool roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented; 1989 PetscBool nooffprocentries; 1990 1991 PetscFunctionBegin; 1992 PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 1993 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 1994 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 1995 PetscCall(PetscLayoutSetUp(B->rmap)); 1996 PetscCall(PetscLayoutSetUp(B->cmap)); 1997 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 1998 m = B->rmap->n / bs; 1999 rstart = B->rmap->rstart / bs; 2000 cend = B->cmap->rend / bs; 2001 2002 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2003 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2004 for (i = 0; i < m; i++) { 2005 nz = ii[i + 1] - ii[i]; 2006 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2007 /* count the ones on the diagonal and above, split into diagonal and off-diagonal portions. */ 2008 JJ = jj + ii[i]; 2009 bd = 0; 2010 for (j = 0; j < nz; j++) { 2011 if (*JJ >= i + rstart) break; 2012 JJ++; 2013 bd++; 2014 } 2015 d = 0; 2016 for (; j < nz; j++) { 2017 if (*JJ++ >= cend) break; 2018 d++; 2019 } 2020 d_nnz[i] = d; 2021 o_nnz[i] = nz - d - bd; 2022 nz = nz - bd; 2023 nz_max = PetscMax(nz_max, nz); 2024 } 2025 PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2026 PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE)); 2027 PetscCall(PetscFree2(d_nnz, o_nnz)); 2028 2029 values = (PetscScalar *)V; 2030 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2031 for (i = 0; i < m; i++) { 2032 PetscInt row = i + rstart; 2033 PetscInt ncols = ii[i + 1] - ii[i]; 2034 const PetscInt *icols = jj + ii[i]; 2035 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2036 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2037 PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2038 } else { /* block ordering does not match so we can only insert one block at a time. */ 2039 PetscInt j; 2040 for (j = 0; j < ncols; j++) { 2041 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2042 PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2043 } 2044 } 2045 } 2046 2047 if (!V) PetscCall(PetscFree(values)); 2048 nooffprocentries = B->nooffprocentries; 2049 B->nooffprocentries = PETSC_TRUE; 2050 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2051 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2052 B->nooffprocentries = nooffprocentries; 2053 2054 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2055 PetscFunctionReturn(PETSC_SUCCESS); 2056 } 2057 2058 /*MC 2059 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2060 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2061 the matrix is stored. 2062 2063 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2064 can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`); 2065 2066 Options Database Key: 2067 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()` 2068 2069 Level: beginner 2070 2071 Note: 2072 The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the 2073 diagonal portion of the matrix of each process has to less than or equal the number of columns. 2074 2075 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType` 2076 M*/ 2077 2078 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2079 { 2080 Mat_MPISBAIJ *b; 2081 PetscBool flg = PETSC_FALSE; 2082 2083 PetscFunctionBegin; 2084 PetscCall(PetscNew(&b)); 2085 B->data = (void *)b; 2086 B->ops[0] = MatOps_Values; 2087 2088 B->ops->destroy = MatDestroy_MPISBAIJ; 2089 B->ops->view = MatView_MPISBAIJ; 2090 B->assembled = PETSC_FALSE; 2091 B->insertmode = NOT_SET_VALUES; 2092 2093 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2094 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2095 2096 /* build local table of row and column ownerships */ 2097 PetscCall(PetscMalloc1(b->size + 2, &b->rangebs)); 2098 2099 /* build cache for off array entries formed */ 2100 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2101 2102 b->donotstash = PETSC_FALSE; 2103 b->colmap = NULL; 2104 b->garray = NULL; 2105 b->roworiented = PETSC_TRUE; 2106 2107 /* stuff used in block assembly */ 2108 b->barray = NULL; 2109 2110 /* stuff used for matrix vector multiply */ 2111 b->lvec = NULL; 2112 b->Mvctx = NULL; 2113 b->slvec0 = NULL; 2114 b->slvec0b = NULL; 2115 b->slvec1 = NULL; 2116 b->slvec1a = NULL; 2117 b->slvec1b = NULL; 2118 b->sMvctx = NULL; 2119 2120 /* stuff for MatGetRow() */ 2121 b->rowindices = NULL; 2122 b->rowvalues = NULL; 2123 b->getrowactive = PETSC_FALSE; 2124 2125 /* hash table stuff */ 2126 b->ht = NULL; 2127 b->hd = NULL; 2128 b->ht_size = 0; 2129 b->ht_flag = PETSC_FALSE; 2130 b->ht_fact = 0; 2131 b->ht_total_ct = 0; 2132 b->ht_insert_ct = 0; 2133 2134 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2135 b->ijonly = PETSC_FALSE; 2136 2137 b->in_loc = NULL; 2138 b->v_loc = NULL; 2139 b->n_loc = 0; 2140 2141 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ)); 2142 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ)); 2143 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ)); 2144 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ)); 2145 #if defined(PETSC_HAVE_ELEMENTAL) 2146 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental)); 2147 #endif 2148 #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE)) 2149 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK)); 2150 #endif 2151 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic)); 2152 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic)); 2153 2154 B->symmetric = PETSC_BOOL3_TRUE; 2155 B->structurally_symmetric = PETSC_BOOL3_TRUE; 2156 B->symmetry_eternal = PETSC_TRUE; 2157 B->structural_symmetry_eternal = PETSC_TRUE; 2158 #if !defined(PETSC_USE_COMPLEX) 2159 B->hermitian = PETSC_BOOL3_TRUE; 2160 #endif 2161 2162 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ)); 2163 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat"); 2164 PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL)); 2165 if (flg) { 2166 PetscReal fact = 1.39; 2167 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2168 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2169 if (fact <= 1.0) fact = 1.39; 2170 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2171 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2172 } 2173 PetscOptionsEnd(); 2174 PetscFunctionReturn(PETSC_SUCCESS); 2175 } 2176 2177 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2178 /*MC 2179 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2180 2181 This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator, 2182 and `MATMPISBAIJ` otherwise. 2183 2184 Options Database Key: 2185 . -mat_type sbaij - sets the matrix type to `MATSBAIJ` during a call to `MatSetFromOptions()` 2186 2187 Level: beginner 2188 2189 .seealso: [](ch_matrices), `Mat`, `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ()`, `MATSEQSBAIJ`, `MATMPISBAIJ` 2190 M*/ 2191 2192 /*@ 2193 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2194 the user should preallocate the matrix storage by setting the parameters 2195 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2196 performance can be increased by more than a factor of 50. 2197 2198 Collective 2199 2200 Input Parameters: 2201 + B - the matrix 2202 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2203 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2204 . d_nz - number of block nonzeros per block row in diagonal portion of local 2205 submatrix (same for all local rows) 2206 . d_nnz - array containing the number of block nonzeros in the various block rows 2207 in the upper triangular and diagonal part of the in diagonal portion of the local 2208 (possibly different for each block row) or `NULL`. If you plan to factor the matrix you must leave room 2209 for the diagonal entry and set a value even if it is zero. 2210 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2211 submatrix (same for all local rows). 2212 - o_nnz - array containing the number of nonzeros in the various block rows of the 2213 off-diagonal portion of the local submatrix that is right of the diagonal 2214 (possibly different for each block row) or `NULL`. 2215 2216 Options Database Keys: 2217 + -mat_no_unroll - uses code that does not unroll the loops in the 2218 block calculations (much slower) 2219 - -mat_block_size - size of the blocks to use 2220 2221 Level: intermediate 2222 2223 Notes: 2224 2225 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2226 than it must be used on all processors that share the object for that argument. 2227 2228 If the *_nnz parameter is given then the *_nz parameter is ignored 2229 2230 Storage Information: 2231 For a square global matrix we define each processor's diagonal portion 2232 to be its local rows and the corresponding columns (a square submatrix); 2233 each processor's off-diagonal portion encompasses the remainder of the 2234 local matrix (a rectangular submatrix). 2235 2236 The user can specify preallocated storage for the diagonal part of 2237 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 2238 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 2239 memory allocation. Likewise, specify preallocated storage for the 2240 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 2241 2242 You can call `MatGetInfo()` to get information on how effective the preallocation was; 2243 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2244 You can also run with the option `-info` and look for messages with the string 2245 malloc in them to see if additional memory allocation was needed. 2246 2247 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2248 the figure below we depict these three local rows and all columns (0-11). 2249 2250 .vb 2251 0 1 2 3 4 5 6 7 8 9 10 11 2252 -------------------------- 2253 row 3 |. . . d d d o o o o o o 2254 row 4 |. . . d d d o o o o o o 2255 row 5 |. . . d d d o o o o o o 2256 -------------------------- 2257 .ve 2258 2259 Thus, any entries in the d locations are stored in the d (diagonal) 2260 submatrix, and any entries in the o locations are stored in the 2261 o (off-diagonal) submatrix. Note that the d matrix is stored in 2262 `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format. 2263 2264 Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular 2265 plus the diagonal part of the d matrix, 2266 and `o_nz` should indicate the number of block nonzeros per row in the o matrix 2267 2268 In general, for PDE problems in which most nonzeros are near the diagonal, 2269 one expects `d_nz` >> `o_nz`. 2270 2271 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()` 2272 @*/ 2273 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2274 { 2275 PetscFunctionBegin; 2276 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2277 PetscValidType(B, 1); 2278 PetscValidLogicalCollectiveInt(B, bs, 2); 2279 PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 2280 PetscFunctionReturn(PETSC_SUCCESS); 2281 } 2282 2283 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2284 /*@ 2285 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`, 2286 (block compressed row). For good matrix assembly performance 2287 the user should preallocate the matrix storage by setting the parameters 2288 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 2289 2290 Collective 2291 2292 Input Parameters: 2293 + comm - MPI communicator 2294 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2295 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 2296 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 2297 This value should be the same as the local size used in creating the 2298 y vector for the matrix-vector product y = Ax. 2299 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 2300 This value should be the same as the local size used in creating the 2301 x vector for the matrix-vector product y = Ax. 2302 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 2303 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 2304 . d_nz - number of block nonzeros per block row in diagonal portion of local 2305 submatrix (same for all local rows) 2306 . d_nnz - array containing the number of block nonzeros in the various block rows 2307 in the upper triangular portion of the in diagonal portion of the local 2308 (possibly different for each block block row) or `NULL`. 2309 If you plan to factor the matrix you must leave room for the diagonal entry and 2310 set its value even if it is zero. 2311 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2312 submatrix (same for all local rows). 2313 - o_nnz - array containing the number of nonzeros in the various block rows of the 2314 off-diagonal portion of the local submatrix (possibly different for 2315 each block row) or `NULL`. 2316 2317 Output Parameter: 2318 . A - the matrix 2319 2320 Options Database Keys: 2321 + -mat_no_unroll - uses code that does not unroll the loops in the 2322 block calculations (much slower) 2323 . -mat_block_size - size of the blocks to use 2324 - -mat_mpi - use the parallel matrix data structures even on one processor 2325 (defaults to using SeqBAIJ format on one processor) 2326 2327 Level: intermediate 2328 2329 Notes: 2330 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 2331 MatXXXXSetPreallocation() paradigm instead of this routine directly. 2332 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 2333 2334 The number of rows and columns must be divisible by blocksize. 2335 This matrix type does not support complex Hermitian operation. 2336 2337 The user MUST specify either the local or global matrix dimensions 2338 (possibly both). 2339 2340 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2341 than it must be used on all processors that share the object for that argument. 2342 2343 If `m` and `n` are not `PETSC_DECIDE`, then the values determines the `PetscLayout` of the matrix and the ranges returned by 2344 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 2345 2346 If the *_nnz parameter is given then the *_nz parameter is ignored 2347 2348 Storage Information: 2349 For a square global matrix we define each processor's diagonal portion 2350 to be its local rows and the corresponding columns (a square submatrix); 2351 each processor's off-diagonal portion encompasses the remainder of the 2352 local matrix (a rectangular submatrix). 2353 2354 The user can specify preallocated storage for the diagonal part of 2355 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 2356 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 2357 memory allocation. Likewise, specify preallocated storage for the 2358 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 2359 2360 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2361 the figure below we depict these three local rows and all columns (0-11). 2362 2363 .vb 2364 0 1 2 3 4 5 6 7 8 9 10 11 2365 -------------------------- 2366 row 3 |. . . d d d o o o o o o 2367 row 4 |. . . d d d o o o o o o 2368 row 5 |. . . d d d o o o o o o 2369 -------------------------- 2370 .ve 2371 2372 Thus, any entries in the d locations are stored in the d (diagonal) 2373 submatrix, and any entries in the o locations are stored in the 2374 o (off-diagonal) submatrix. Note that the d matrix is stored in 2375 `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format. 2376 2377 Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular 2378 plus the diagonal part of the d matrix, 2379 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 2380 In general, for PDE problems in which most nonzeros are near the diagonal, 2381 one expects `d_nz` >> `o_nz`. 2382 2383 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, 2384 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout` 2385 @*/ 2386 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 2387 { 2388 PetscMPIInt size; 2389 2390 PetscFunctionBegin; 2391 PetscCall(MatCreate(comm, A)); 2392 PetscCall(MatSetSizes(*A, m, n, M, N)); 2393 PetscCallMPI(MPI_Comm_size(comm, &size)); 2394 if (size > 1) { 2395 PetscCall(MatSetType(*A, MATMPISBAIJ)); 2396 PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 2397 } else { 2398 PetscCall(MatSetType(*A, MATSEQSBAIJ)); 2399 PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 2400 } 2401 PetscFunctionReturn(PETSC_SUCCESS); 2402 } 2403 2404 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2405 { 2406 Mat mat; 2407 Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data; 2408 PetscInt len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs; 2409 PetscScalar *array; 2410 2411 PetscFunctionBegin; 2412 *newmat = NULL; 2413 2414 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2415 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2416 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2417 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2418 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2419 2420 if (matin->hash_active) { 2421 PetscCall(MatSetUp(mat)); 2422 } else { 2423 mat->factortype = matin->factortype; 2424 mat->preallocated = PETSC_TRUE; 2425 mat->assembled = PETSC_TRUE; 2426 mat->insertmode = NOT_SET_VALUES; 2427 2428 a = (Mat_MPISBAIJ *)mat->data; 2429 a->bs2 = oldmat->bs2; 2430 a->mbs = oldmat->mbs; 2431 a->nbs = oldmat->nbs; 2432 a->Mbs = oldmat->Mbs; 2433 a->Nbs = oldmat->Nbs; 2434 2435 a->size = oldmat->size; 2436 a->rank = oldmat->rank; 2437 a->donotstash = oldmat->donotstash; 2438 a->roworiented = oldmat->roworiented; 2439 a->rowindices = NULL; 2440 a->rowvalues = NULL; 2441 a->getrowactive = PETSC_FALSE; 2442 a->barray = NULL; 2443 a->rstartbs = oldmat->rstartbs; 2444 a->rendbs = oldmat->rendbs; 2445 a->cstartbs = oldmat->cstartbs; 2446 a->cendbs = oldmat->cendbs; 2447 2448 /* hash table stuff */ 2449 a->ht = NULL; 2450 a->hd = NULL; 2451 a->ht_size = 0; 2452 a->ht_flag = oldmat->ht_flag; 2453 a->ht_fact = oldmat->ht_fact; 2454 a->ht_total_ct = 0; 2455 a->ht_insert_ct = 0; 2456 2457 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2)); 2458 if (oldmat->colmap) { 2459 #if defined(PETSC_USE_CTABLE) 2460 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 2461 #else 2462 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 2463 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 2464 #endif 2465 } else a->colmap = NULL; 2466 2467 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) { 2468 PetscCall(PetscMalloc1(len, &a->garray)); 2469 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 2470 } else a->garray = NULL; 2471 2472 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 2473 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 2474 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 2475 2476 PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0)); 2477 PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1)); 2478 2479 PetscCall(VecGetLocalSize(a->slvec1, &nt)); 2480 PetscCall(VecGetArray(a->slvec1, &array)); 2481 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a)); 2482 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b)); 2483 PetscCall(VecRestoreArray(a->slvec1, &array)); 2484 PetscCall(VecGetArray(a->slvec0, &array)); 2485 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b)); 2486 PetscCall(VecRestoreArray(a->slvec0, &array)); 2487 2488 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2489 PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx)); 2490 a->sMvctx = oldmat->sMvctx; 2491 2492 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 2493 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 2494 } 2495 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 2496 *newmat = mat; 2497 PetscFunctionReturn(PETSC_SUCCESS); 2498 } 2499 2500 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 2501 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary 2502 2503 static PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer) 2504 { 2505 PetscBool isbinary; 2506 2507 PetscFunctionBegin; 2508 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2509 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 2510 PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer)); 2511 PetscFunctionReturn(PETSC_SUCCESS); 2512 } 2513 2514 static PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[]) 2515 { 2516 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 2517 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)a->B->data; 2518 PetscReal atmp; 2519 PetscReal *work, *svalues, *rvalues; 2520 PetscInt i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol; 2521 PetscMPIInt rank, size; 2522 PetscInt *rowners_bs, count, source; 2523 PetscScalar *va; 2524 MatScalar *ba; 2525 MPI_Status stat; 2526 2527 PetscFunctionBegin; 2528 PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov"); 2529 PetscCall(MatGetRowMaxAbs(a->A, v, NULL)); 2530 PetscCall(VecGetArray(v, &va)); 2531 2532 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2533 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2534 2535 bs = A->rmap->bs; 2536 mbs = a->mbs; 2537 Mbs = a->Mbs; 2538 ba = b->a; 2539 bi = b->i; 2540 bj = b->j; 2541 2542 /* find ownerships */ 2543 rowners_bs = A->rmap->range; 2544 2545 /* each proc creates an array to be distributed */ 2546 PetscCall(PetscCalloc1(bs * Mbs, &work)); 2547 2548 /* row_max for B */ 2549 if (rank != size - 1) { 2550 for (i = 0; i < mbs; i++) { 2551 ncols = bi[1] - bi[0]; 2552 bi++; 2553 brow = bs * i; 2554 for (j = 0; j < ncols; j++) { 2555 bcol = bs * (*bj); 2556 for (kcol = 0; kcol < bs; kcol++) { 2557 col = bcol + kcol; /* local col index */ 2558 col += rowners_bs[rank + 1]; /* global col index */ 2559 for (krow = 0; krow < bs; krow++) { 2560 atmp = PetscAbsScalar(*ba); 2561 ba++; 2562 row = brow + krow; /* local row index */ 2563 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2564 if (work[col] < atmp) work[col] = atmp; 2565 } 2566 } 2567 bj++; 2568 } 2569 } 2570 2571 /* send values to its owners */ 2572 for (PetscMPIInt dest = rank + 1; dest < size; dest++) { 2573 svalues = work + rowners_bs[dest]; 2574 count = rowners_bs[dest + 1] - rowners_bs[dest]; 2575 PetscCallMPI(MPIU_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A))); 2576 } 2577 } 2578 2579 /* receive values */ 2580 if (rank) { 2581 rvalues = work; 2582 count = rowners_bs[rank + 1] - rowners_bs[rank]; 2583 for (source = 0; source < rank; source++) { 2584 PetscCallMPI(MPIU_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat)); 2585 /* process values */ 2586 for (i = 0; i < count; i++) { 2587 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2588 } 2589 } 2590 } 2591 2592 PetscCall(VecRestoreArray(v, &va)); 2593 PetscCall(PetscFree(work)); 2594 PetscFunctionReturn(PETSC_SUCCESS); 2595 } 2596 2597 static PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2598 { 2599 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 2600 PetscInt mbs = mat->mbs, bs = matin->rmap->bs; 2601 PetscScalar *x, *ptr, *from; 2602 Vec bb1; 2603 const PetscScalar *b; 2604 2605 PetscFunctionBegin; 2606 PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 2607 PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented"); 2608 2609 if (flag == SOR_APPLY_UPPER) { 2610 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2611 PetscFunctionReturn(PETSC_SUCCESS); 2612 } 2613 2614 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2615 if (flag & SOR_ZERO_INITIAL_GUESS) { 2616 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx)); 2617 its--; 2618 } 2619 2620 PetscCall(VecDuplicate(bb, &bb1)); 2621 while (its--) { 2622 /* lower triangular part: slvec0b = - B^T*xx */ 2623 PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 2624 2625 /* copy xx into slvec0a */ 2626 PetscCall(VecGetArray(mat->slvec0, &ptr)); 2627 PetscCall(VecGetArray(xx, &x)); 2628 PetscCall(PetscArraycpy(ptr, x, bs * mbs)); 2629 PetscCall(VecRestoreArray(mat->slvec0, &ptr)); 2630 2631 PetscCall(VecScale(mat->slvec0, -1.0)); 2632 2633 /* copy bb into slvec1a */ 2634 PetscCall(VecGetArray(mat->slvec1, &ptr)); 2635 PetscCall(VecGetArrayRead(bb, &b)); 2636 PetscCall(PetscArraycpy(ptr, b, bs * mbs)); 2637 PetscCall(VecRestoreArray(mat->slvec1, &ptr)); 2638 2639 /* set slvec1b = 0 */ 2640 PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b)); 2641 PetscCall(VecZeroEntries(mat->slvec1b)); 2642 2643 PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2644 PetscCall(VecRestoreArray(xx, &x)); 2645 PetscCall(VecRestoreArrayRead(bb, &b)); 2646 PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2647 2648 /* upper triangular part: bb1 = bb1 - B*x */ 2649 PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1)); 2650 2651 /* local diagonal sweep */ 2652 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx)); 2653 } 2654 PetscCall(VecDestroy(&bb1)); 2655 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2656 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2657 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2658 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2659 } else if (flag & SOR_EISENSTAT) { 2660 Vec xx1; 2661 PetscBool hasop; 2662 const PetscScalar *diag; 2663 PetscScalar *sl, scale = (omega - 2.0) / omega; 2664 PetscInt i, n; 2665 2666 if (!mat->xx1) { 2667 PetscCall(VecDuplicate(bb, &mat->xx1)); 2668 PetscCall(VecDuplicate(bb, &mat->bb1)); 2669 } 2670 xx1 = mat->xx1; 2671 bb1 = mat->bb1; 2672 2673 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 2674 2675 if (!mat->diag) { 2676 /* this is wrong for same matrix with new nonzero values */ 2677 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 2678 PetscCall(MatGetDiagonal(matin, mat->diag)); 2679 } 2680 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 2681 2682 if (hasop) { 2683 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 2684 PetscCall(VecAYPX(mat->slvec1a, scale, bb)); 2685 } else { 2686 /* 2687 These two lines are replaced by code that may be a bit faster for a good compiler 2688 PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx)); 2689 PetscCall(VecAYPX(mat->slvec1a,scale,bb)); 2690 */ 2691 PetscCall(VecGetArray(mat->slvec1a, &sl)); 2692 PetscCall(VecGetArrayRead(mat->diag, &diag)); 2693 PetscCall(VecGetArrayRead(bb, &b)); 2694 PetscCall(VecGetArray(xx, &x)); 2695 PetscCall(VecGetLocalSize(xx, &n)); 2696 if (omega == 1.0) { 2697 for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i]; 2698 PetscCall(PetscLogFlops(2.0 * n)); 2699 } else { 2700 for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i]; 2701 PetscCall(PetscLogFlops(3.0 * n)); 2702 } 2703 PetscCall(VecRestoreArray(mat->slvec1a, &sl)); 2704 PetscCall(VecRestoreArrayRead(mat->diag, &diag)); 2705 PetscCall(VecRestoreArrayRead(bb, &b)); 2706 PetscCall(VecRestoreArray(xx, &x)); 2707 } 2708 2709 /* multiply off-diagonal portion of matrix */ 2710 PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b)); 2711 PetscCall(VecZeroEntries(mat->slvec1b)); 2712 PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 2713 PetscCall(VecGetArray(mat->slvec0, &from)); 2714 PetscCall(VecGetArray(xx, &x)); 2715 PetscCall(PetscArraycpy(from, x, bs * mbs)); 2716 PetscCall(VecRestoreArray(mat->slvec0, &from)); 2717 PetscCall(VecRestoreArray(xx, &x)); 2718 PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2719 PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2720 PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a)); 2721 2722 /* local sweep */ 2723 PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 2724 PetscCall(VecAXPY(xx, 1.0, xx1)); 2725 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format"); 2726 PetscFunctionReturn(PETSC_SUCCESS); 2727 } 2728 2729 /*@ 2730 MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard CSR format for the local rows. 2731 2732 Collective 2733 2734 Input Parameters: 2735 + comm - MPI communicator 2736 . bs - the block size, only a block size of 1 is supported 2737 . m - number of local rows (Cannot be `PETSC_DECIDE`) 2738 . n - This value should be the same as the local size used in creating the 2739 x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have 2740 calculated if `N` is given) For square matrices `n` is almost always `m`. 2741 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 2742 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 2743 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 2744 . j - column indices 2745 - a - matrix values 2746 2747 Output Parameter: 2748 . mat - the matrix 2749 2750 Level: intermediate 2751 2752 Notes: 2753 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 2754 thus you CANNOT change the matrix entries by changing the values of `a` after you have 2755 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 2756 2757 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 2758 2759 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 2760 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatMPISBAIJSetPreallocationCSR()` 2761 @*/ 2762 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 2763 { 2764 PetscFunctionBegin; 2765 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 2766 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2767 PetscCall(MatCreate(comm, mat)); 2768 PetscCall(MatSetSizes(*mat, m, n, M, N)); 2769 PetscCall(MatSetType(*mat, MATMPISBAIJ)); 2770 PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 2771 PetscFunctionReturn(PETSC_SUCCESS); 2772 } 2773 2774 /*@ 2775 MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values 2776 2777 Collective 2778 2779 Input Parameters: 2780 + B - the matrix 2781 . bs - the block size 2782 . i - the indices into `j` for the start of each local row (indices start with zero) 2783 . j - the column indices for each local row (indices start with zero) these must be sorted for each row 2784 - v - optional values in the matrix, pass `NULL` if not provided 2785 2786 Level: advanced 2787 2788 Notes: 2789 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 2790 thus you CANNOT change the matrix entries by changing the values of `v` after you have 2791 called this routine. 2792 2793 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries 2794 and usually the numerical values as well 2795 2796 Any entries passed in that are below the diagonal are ignored 2797 2798 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, 2799 `MatCreateMPISBAIJWithArrays()` 2800 @*/ 2801 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2802 { 2803 PetscFunctionBegin; 2804 PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2805 PetscFunctionReturn(PETSC_SUCCESS); 2806 } 2807 2808 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 2809 { 2810 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 2811 PetscInt *indx; 2812 PetscScalar *values; 2813 2814 PetscFunctionBegin; 2815 PetscCall(MatGetSize(inmat, &m, &N)); 2816 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 2817 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data; 2818 PetscInt *dnz, *onz, mbs, Nbs, nbs; 2819 PetscInt *bindx, rmax = a->rmax, j; 2820 PetscMPIInt rank, size; 2821 2822 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 2823 mbs = m / bs; 2824 Nbs = N / cbs; 2825 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 2826 nbs = n / cbs; 2827 2828 PetscCall(PetscMalloc1(rmax, &bindx)); 2829 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 2830 2831 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2832 PetscCallMPI(MPI_Comm_rank(comm, &size)); 2833 if (rank == size - 1) { 2834 /* Check sum(nbs) = Nbs */ 2835 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 2836 } 2837 2838 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 2839 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 2840 for (i = 0; i < mbs; i++) { 2841 PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 2842 nnz = nnz / bs; 2843 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 2844 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 2845 PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 2846 } 2847 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 2848 PetscCall(PetscFree(bindx)); 2849 2850 PetscCall(MatCreate(comm, outmat)); 2851 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 2852 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 2853 PetscCall(MatSetType(*outmat, MATSBAIJ)); 2854 PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz)); 2855 PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 2856 MatPreallocateEnd(dnz, onz); 2857 } 2858 2859 /* numeric phase */ 2860 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 2861 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 2862 2863 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 2864 for (i = 0; i < m; i++) { 2865 PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 2866 Ii = i + rstart; 2867 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 2868 PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 2869 } 2870 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 2871 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 2872 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 2873 PetscFunctionReturn(PETSC_SUCCESS); 2874 } 2875