1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <../src/mat/impls/sell/mpi/mpisell.h> /*I "petscmat.h" I*/ 3 #include <petsc/private/vecimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 8 /*MC 9 MATSELL - MATSELL = "sell" - A matrix type to be used for sparse matrices. 10 11 This matrix type is identical to `MATSEQSELL` when constructed with a single process communicator, 12 and `MATMPISELL` otherwise. As a result, for single process communicators, 13 `MatSeqSELLSetPreallocation()` is supported, and similarly `MatMPISELLSetPreallocation()` is supported 14 for communicators controlling multiple processes. It is recommended that you call both of 15 the above preallocation routines for simplicity. 16 17 Options Database Keys: 18 . -mat_type sell - sets the matrix type to `MATSELL` during a call to `MatSetFromOptions()` 19 20 Level: beginner 21 22 .seealso: `Mat`, `MATAIJ`, `MATBAIJ`, `MATSBAIJ`, `MatCreateSELL()`, `MatCreateSeqSELL()`, `MATSEQSELL`, `MATMPISELL` 23 M*/ 24 25 static PetscErrorCode MatDiagonalSet_MPISELL(Mat Y, Vec D, InsertMode is) 26 { 27 Mat_MPISELL *sell = (Mat_MPISELL *)Y->data; 28 29 PetscFunctionBegin; 30 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 31 PetscCall(MatDiagonalSet(sell->A, D, is)); 32 } else { 33 PetscCall(MatDiagonalSet_Default(Y, D, is)); 34 } 35 PetscFunctionReturn(PETSC_SUCCESS); 36 } 37 38 /* 39 Local utility routine that creates a mapping from the global column 40 number to the local number in the off-diagonal part of the local 41 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 42 a slightly higher hash table cost; without it it is not scalable (each processor 43 has an order N integer array but is fast to access. 44 */ 45 PetscErrorCode MatCreateColmap_MPISELL_Private(Mat mat) 46 { 47 Mat_MPISELL *sell = (Mat_MPISELL *)mat->data; 48 PetscInt n = sell->B->cmap->n, i; 49 50 PetscFunctionBegin; 51 PetscCheck(sell->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPISELL Matrix was assembled but is missing garray"); 52 #if defined(PETSC_USE_CTABLE) 53 PetscCall(PetscHMapICreateWithSize(n, &sell->colmap)); 54 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(sell->colmap, sell->garray[i] + 1, i + 1)); 55 #else 56 PetscCall(PetscCalloc1(mat->cmap->N + 1, &sell->colmap)); 57 for (i = 0; i < n; i++) sell->colmap[sell->garray[i]] = i + 1; 58 #endif 59 PetscFunctionReturn(PETSC_SUCCESS); 60 } 61 62 #define MatSetValues_SeqSELL_A_Private(row, col, value, addv, orow, ocol) \ 63 { \ 64 if (col <= lastcol1) low1 = 0; \ 65 else high1 = nrow1; \ 66 lastcol1 = col; \ 67 while (high1 - low1 > 5) { \ 68 t = (low1 + high1) / 2; \ 69 if (cp1[sliceheight * t] > col) high1 = t; \ 70 else low1 = t; \ 71 } \ 72 for (_i = low1; _i < high1; _i++) { \ 73 if (cp1[sliceheight * _i] > col) break; \ 74 if (cp1[sliceheight * _i] == col) { \ 75 if (addv == ADD_VALUES) vp1[sliceheight * _i] += value; \ 76 else vp1[sliceheight * _i] = value; \ 77 inserted = PETSC_TRUE; \ 78 goto a_noinsert; \ 79 } \ 80 } \ 81 if (value == 0.0 && ignorezeroentries) { \ 82 low1 = 0; \ 83 high1 = nrow1; \ 84 goto a_noinsert; \ 85 } \ 86 if (nonew == 1) { \ 87 low1 = 0; \ 88 high1 = nrow1; \ 89 goto a_noinsert; \ 90 } \ 91 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 92 MatSeqXSELLReallocateSELL(A, am, 1, nrow1, a->sliidx, a->sliceheight, row / sliceheight, row, col, a->colidx, a->val, cp1, vp1, nonew, MatScalar); \ 93 /* shift up all the later entries in this row */ \ 94 for (ii = nrow1 - 1; ii >= _i; ii--) { \ 95 cp1[sliceheight * (ii + 1)] = cp1[sliceheight * ii]; \ 96 vp1[sliceheight * (ii + 1)] = vp1[sliceheight * ii]; \ 97 } \ 98 cp1[sliceheight * _i] = col; \ 99 vp1[sliceheight * _i] = value; \ 100 a->nz++; \ 101 nrow1++; \ 102 a_noinsert:; \ 103 a->rlen[row] = nrow1; \ 104 } 105 106 #define MatSetValues_SeqSELL_B_Private(row, col, value, addv, orow, ocol) \ 107 { \ 108 if (col <= lastcol2) low2 = 0; \ 109 else high2 = nrow2; \ 110 lastcol2 = col; \ 111 while (high2 - low2 > 5) { \ 112 t = (low2 + high2) / 2; \ 113 if (cp2[sliceheight * t] > col) high2 = t; \ 114 else low2 = t; \ 115 } \ 116 for (_i = low2; _i < high2; _i++) { \ 117 if (cp2[sliceheight * _i] > col) break; \ 118 if (cp2[sliceheight * _i] == col) { \ 119 if (addv == ADD_VALUES) vp2[sliceheight * _i] += value; \ 120 else vp2[sliceheight * _i] = value; \ 121 inserted = PETSC_TRUE; \ 122 goto b_noinsert; \ 123 } \ 124 } \ 125 if (value == 0.0 && ignorezeroentries) { \ 126 low2 = 0; \ 127 high2 = nrow2; \ 128 goto b_noinsert; \ 129 } \ 130 if (nonew == 1) { \ 131 low2 = 0; \ 132 high2 = nrow2; \ 133 goto b_noinsert; \ 134 } \ 135 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 136 MatSeqXSELLReallocateSELL(B, bm, 1, nrow2, b->sliidx, b->sliceheight, row / sliceheight, row, col, b->colidx, b->val, cp2, vp2, nonew, MatScalar); \ 137 /* shift up all the later entries in this row */ \ 138 for (ii = nrow2 - 1; ii >= _i; ii--) { \ 139 cp2[sliceheight * (ii + 1)] = cp2[sliceheight * ii]; \ 140 vp2[sliceheight * (ii + 1)] = vp2[sliceheight * ii]; \ 141 } \ 142 cp2[sliceheight * _i] = col; \ 143 vp2[sliceheight * _i] = value; \ 144 b->nz++; \ 145 nrow2++; \ 146 b_noinsert:; \ 147 b->rlen[row] = nrow2; \ 148 } 149 150 static PetscErrorCode MatSetValues_MPISELL(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 151 { 152 Mat_MPISELL *sell = (Mat_MPISELL *)mat->data; 153 PetscScalar value; 154 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, shift1, shift2; 155 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 156 PetscBool roworiented = sell->roworiented; 157 158 /* Some Variables required in the macro */ 159 Mat A = sell->A; 160 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 161 PetscBool ignorezeroentries = a->ignorezeroentries, found; 162 Mat B = sell->B; 163 Mat_SeqSELL *b = (Mat_SeqSELL *)B->data; 164 PetscInt *cp1, *cp2, ii, _i, nrow1, nrow2, low1, high1, low2, high2, t, lastcol1, lastcol2, sliceheight = a->sliceheight; 165 MatScalar *vp1, *vp2; 166 167 PetscFunctionBegin; 168 for (i = 0; i < m; i++) { 169 if (im[i] < 0) continue; 170 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 171 if (im[i] >= rstart && im[i] < rend) { 172 row = im[i] - rstart; 173 lastcol1 = -1; 174 shift1 = a->sliidx[row / sliceheight] + (row % sliceheight); /* starting index of the row */ 175 cp1 = PetscSafePointerPlusOffset(a->colidx, shift1); 176 vp1 = PetscSafePointerPlusOffset(a->val, shift1); 177 nrow1 = a->rlen[row]; 178 low1 = 0; 179 high1 = nrow1; 180 lastcol2 = -1; 181 shift2 = b->sliidx[row / sliceheight] + (row % sliceheight); /* starting index of the row */ 182 cp2 = PetscSafePointerPlusOffset(b->colidx, shift2); 183 vp2 = PetscSafePointerPlusOffset(b->val, shift2); 184 nrow2 = b->rlen[row]; 185 low2 = 0; 186 high2 = nrow2; 187 188 for (j = 0; j < n; j++) { 189 if (roworiented) value = v[i * n + j]; 190 else value = v[i + j * m]; 191 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 192 if (in[j] >= cstart && in[j] < cend) { 193 col = in[j] - cstart; 194 MatSetValue_SeqSELL_Private(A, row, col, value, addv, im[i], in[j], cp1, vp1, lastcol1, low1, high1); /* set one value */ 195 #if defined(PETSC_HAVE_CUDA) 196 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && found) A->offloadmask = PETSC_OFFLOAD_CPU; 197 #endif 198 } else if (in[j] < 0) { 199 continue; 200 } else { 201 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 202 if (mat->was_assembled) { 203 if (!sell->colmap) PetscCall(MatCreateColmap_MPISELL_Private(mat)); 204 #if defined(PETSC_USE_CTABLE) 205 PetscCall(PetscHMapIGetWithDefault(sell->colmap, in[j] + 1, 0, &col)); 206 col--; 207 #else 208 col = sell->colmap[in[j]] - 1; 209 #endif 210 if (col < 0 && !((Mat_SeqSELL *)sell->B->data)->nonew) { 211 PetscCall(MatDisAssemble_MPISELL(mat)); 212 col = in[j]; 213 /* Reinitialize the variables required by MatSetValues_SeqSELL_B_Private() */ 214 B = sell->B; 215 b = (Mat_SeqSELL *)B->data; 216 shift2 = b->sliidx[row / sliceheight] + (row % sliceheight); /* starting index of the row */ 217 cp2 = b->colidx + shift2; 218 vp2 = b->val + shift2; 219 nrow2 = b->rlen[row]; 220 low2 = 0; 221 high2 = nrow2; 222 found = PETSC_FALSE; 223 } else { 224 PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 225 } 226 } else col = in[j]; 227 MatSetValue_SeqSELL_Private(B, row, col, value, addv, im[i], in[j], cp2, vp2, lastcol2, low2, high2); /* set one value */ 228 #if defined(PETSC_HAVE_CUDA) 229 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && found) B->offloadmask = PETSC_OFFLOAD_CPU; 230 #endif 231 } 232 } 233 } else { 234 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 235 if (!sell->donotstash) { 236 mat->assembled = PETSC_FALSE; 237 if (roworiented) { 238 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 239 } else { 240 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 241 } 242 } 243 } 244 } 245 PetscFunctionReturn(PETSC_SUCCESS); 246 } 247 248 static PetscErrorCode MatGetValues_MPISELL(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 249 { 250 Mat_MPISELL *sell = (Mat_MPISELL *)mat->data; 251 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 252 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 253 254 PetscFunctionBegin; 255 for (i = 0; i < m; i++) { 256 if (idxm[i] < 0) continue; /* negative row */ 257 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 258 if (idxm[i] >= rstart && idxm[i] < rend) { 259 row = idxm[i] - rstart; 260 for (j = 0; j < n; j++) { 261 if (idxn[j] < 0) continue; /* negative column */ 262 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 263 if (idxn[j] >= cstart && idxn[j] < cend) { 264 col = idxn[j] - cstart; 265 PetscCall(MatGetValues(sell->A, 1, &row, 1, &col, v + i * n + j)); 266 } else { 267 if (!sell->colmap) PetscCall(MatCreateColmap_MPISELL_Private(mat)); 268 #if defined(PETSC_USE_CTABLE) 269 PetscCall(PetscHMapIGetWithDefault(sell->colmap, idxn[j] + 1, 0, &col)); 270 col--; 271 #else 272 col = sell->colmap[idxn[j]] - 1; 273 #endif 274 if ((col < 0) || (sell->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 275 else PetscCall(MatGetValues(sell->B, 1, &row, 1, &col, v + i * n + j)); 276 } 277 } 278 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 279 } 280 PetscFunctionReturn(PETSC_SUCCESS); 281 } 282 283 static PetscErrorCode MatAssemblyBegin_MPISELL(Mat mat, MatAssemblyType mode) 284 { 285 Mat_MPISELL *sell = (Mat_MPISELL *)mat->data; 286 PetscInt nstash, reallocs; 287 288 PetscFunctionBegin; 289 if (sell->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 290 291 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 292 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 293 PetscCall(PetscInfo(sell->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 294 PetscFunctionReturn(PETSC_SUCCESS); 295 } 296 297 PetscErrorCode MatAssemblyEnd_MPISELL(Mat mat, MatAssemblyType mode) 298 { 299 Mat_MPISELL *sell = (Mat_MPISELL *)mat->data; 300 PetscMPIInt n; 301 PetscInt i, flg; 302 PetscInt *row, *col; 303 PetscScalar *val; 304 PetscBool other_disassembled; 305 /* do not use 'b = (Mat_SeqSELL*)sell->B->data' as B can be reset in disassembly */ 306 PetscFunctionBegin; 307 if (!sell->donotstash && !mat->nooffprocentries) { 308 while (1) { 309 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 310 if (!flg) break; 311 312 for (i = 0; i < n; i++) { /* assemble one by one */ 313 PetscCall(MatSetValues_MPISELL(mat, 1, row + i, 1, col + i, val + i, mat->insertmode)); 314 } 315 } 316 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 317 } 318 #if defined(PETSC_HAVE_CUDA) 319 if (mat->offloadmask == PETSC_OFFLOAD_CPU) sell->A->offloadmask = PETSC_OFFLOAD_CPU; 320 #endif 321 PetscCall(MatAssemblyBegin(sell->A, mode)); 322 PetscCall(MatAssemblyEnd(sell->A, mode)); 323 324 /* 325 determine if any processor has disassembled, if so we must 326 also disassemble ourselves, in order that we may reassemble. 327 */ 328 /* 329 if nonzero structure of submatrix B cannot change then we know that 330 no processor disassembled thus we can skip this stuff 331 */ 332 if (!((Mat_SeqSELL *)sell->B->data)->nonew) { 333 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 334 if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISELL(mat)); 335 } 336 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPISELL(mat)); 337 #if defined(PETSC_HAVE_CUDA) 338 if (mat->offloadmask == PETSC_OFFLOAD_CPU && sell->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) sell->B->offloadmask = PETSC_OFFLOAD_CPU; 339 #endif 340 PetscCall(MatAssemblyBegin(sell->B, mode)); 341 PetscCall(MatAssemblyEnd(sell->B, mode)); 342 PetscCall(PetscFree2(sell->rowvalues, sell->rowindices)); 343 sell->rowvalues = NULL; 344 PetscCall(VecDestroy(&sell->diag)); 345 346 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 347 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqSELL *)sell->A->data)->nonew) { 348 PetscObjectState state = sell->A->nonzerostate + sell->B->nonzerostate; 349 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 350 } 351 #if defined(PETSC_HAVE_CUDA) 352 mat->offloadmask = PETSC_OFFLOAD_BOTH; 353 #endif 354 PetscFunctionReturn(PETSC_SUCCESS); 355 } 356 357 static PetscErrorCode MatZeroEntries_MPISELL(Mat A) 358 { 359 Mat_MPISELL *l = (Mat_MPISELL *)A->data; 360 361 PetscFunctionBegin; 362 PetscCall(MatZeroEntries(l->A)); 363 PetscCall(MatZeroEntries(l->B)); 364 PetscFunctionReturn(PETSC_SUCCESS); 365 } 366 367 static PetscErrorCode MatMult_MPISELL(Mat A, Vec xx, Vec yy) 368 { 369 Mat_MPISELL *a = (Mat_MPISELL *)A->data; 370 PetscInt nt; 371 372 PetscFunctionBegin; 373 PetscCall(VecGetLocalSize(xx, &nt)); 374 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 375 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 376 PetscCall((*a->A->ops->mult)(a->A, xx, yy)); 377 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 378 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy)); 379 PetscFunctionReturn(PETSC_SUCCESS); 380 } 381 382 static PetscErrorCode MatMultDiagonalBlock_MPISELL(Mat A, Vec bb, Vec xx) 383 { 384 Mat_MPISELL *a = (Mat_MPISELL *)A->data; 385 386 PetscFunctionBegin; 387 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 388 PetscFunctionReturn(PETSC_SUCCESS); 389 } 390 391 static PetscErrorCode MatMultAdd_MPISELL(Mat A, Vec xx, Vec yy, Vec zz) 392 { 393 Mat_MPISELL *a = (Mat_MPISELL *)A->data; 394 395 PetscFunctionBegin; 396 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 397 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 398 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 399 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 static PetscErrorCode MatMultTranspose_MPISELL(Mat A, Vec xx, Vec yy) 404 { 405 Mat_MPISELL *a = (Mat_MPISELL *)A->data; 406 407 PetscFunctionBegin; 408 /* do nondiagonal part */ 409 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 410 /* do local part */ 411 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 412 /* add partial results together */ 413 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 414 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 415 PetscFunctionReturn(PETSC_SUCCESS); 416 } 417 418 static PetscErrorCode MatIsTranspose_MPISELL(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 419 { 420 MPI_Comm comm; 421 Mat_MPISELL *Asell = (Mat_MPISELL *)Amat->data, *Bsell; 422 Mat Adia = Asell->A, Bdia, Aoff, Boff, *Aoffs, *Boffs; 423 IS Me, Notme; 424 PetscInt M, N, first, last, *notme, i; 425 PetscMPIInt size; 426 427 PetscFunctionBegin; 428 /* Easy test: symmetric diagonal block */ 429 Bsell = (Mat_MPISELL *)Bmat->data; 430 Bdia = Bsell->A; 431 PetscCall(MatIsTranspose(Adia, Bdia, tol, f)); 432 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 433 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 434 PetscCallMPI(MPI_Comm_size(comm, &size)); 435 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 436 437 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 438 PetscCall(MatGetSize(Amat, &M, &N)); 439 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 440 PetscCall(PetscMalloc1(N - last + first, ¬me)); 441 for (i = 0; i < first; i++) notme[i] = i; 442 for (i = last; i < M; i++) notme[i - last + first] = i; 443 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 444 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 445 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 446 Aoff = Aoffs[0]; 447 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 448 Boff = Boffs[0]; 449 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 450 PetscCall(MatDestroyMatrices(1, &Aoffs)); 451 PetscCall(MatDestroyMatrices(1, &Boffs)); 452 PetscCall(ISDestroy(&Me)); 453 PetscCall(ISDestroy(&Notme)); 454 PetscCall(PetscFree(notme)); 455 PetscFunctionReturn(PETSC_SUCCESS); 456 } 457 458 static PetscErrorCode MatMultTransposeAdd_MPISELL(Mat A, Vec xx, Vec yy, Vec zz) 459 { 460 Mat_MPISELL *a = (Mat_MPISELL *)A->data; 461 462 PetscFunctionBegin; 463 /* do nondiagonal part */ 464 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 465 /* do local part */ 466 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 467 /* add partial results together */ 468 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 469 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 470 PetscFunctionReturn(PETSC_SUCCESS); 471 } 472 473 /* 474 This only works correctly for square matrices where the subblock A->A is the 475 diagonal block 476 */ 477 static PetscErrorCode MatGetDiagonal_MPISELL(Mat A, Vec v) 478 { 479 Mat_MPISELL *a = (Mat_MPISELL *)A->data; 480 481 PetscFunctionBegin; 482 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 483 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 484 PetscCall(MatGetDiagonal(a->A, v)); 485 PetscFunctionReturn(PETSC_SUCCESS); 486 } 487 488 static PetscErrorCode MatScale_MPISELL(Mat A, PetscScalar aa) 489 { 490 Mat_MPISELL *a = (Mat_MPISELL *)A->data; 491 492 PetscFunctionBegin; 493 PetscCall(MatScale(a->A, aa)); 494 PetscCall(MatScale(a->B, aa)); 495 PetscFunctionReturn(PETSC_SUCCESS); 496 } 497 498 PetscErrorCode MatDestroy_MPISELL(Mat mat) 499 { 500 Mat_MPISELL *sell = (Mat_MPISELL *)mat->data; 501 502 PetscFunctionBegin; 503 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 504 PetscCall(MatStashDestroy_Private(&mat->stash)); 505 PetscCall(VecDestroy(&sell->diag)); 506 PetscCall(MatDestroy(&sell->A)); 507 PetscCall(MatDestroy(&sell->B)); 508 #if defined(PETSC_USE_CTABLE) 509 PetscCall(PetscHMapIDestroy(&sell->colmap)); 510 #else 511 PetscCall(PetscFree(sell->colmap)); 512 #endif 513 PetscCall(PetscFree(sell->garray)); 514 PetscCall(VecDestroy(&sell->lvec)); 515 PetscCall(VecScatterDestroy(&sell->Mvctx)); 516 PetscCall(PetscFree2(sell->rowvalues, sell->rowindices)); 517 PetscCall(PetscFree(sell->ld)); 518 PetscCall(PetscFree(mat->data)); 519 520 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 521 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 522 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 523 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 524 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISELLSetPreallocation_C", NULL)); 525 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisell_mpiaij_C", NULL)); 526 #if defined(PETSC_HAVE_CUDA) 527 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisell_mpisellcuda_C", NULL)); 528 #endif 529 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 530 PetscFunctionReturn(PETSC_SUCCESS); 531 } 532 533 #include <petscdraw.h> 534 static PetscErrorCode MatView_MPISELL_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 535 { 536 Mat_MPISELL *sell = (Mat_MPISELL *)mat->data; 537 PetscMPIInt rank = sell->rank, size = sell->size; 538 PetscBool isdraw, iascii, isbinary; 539 PetscViewer sviewer; 540 PetscViewerFormat format; 541 542 PetscFunctionBegin; 543 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 544 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 545 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 546 if (iascii) { 547 PetscCall(PetscViewerGetFormat(viewer, &format)); 548 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 549 MatInfo info; 550 PetscInt *inodes; 551 552 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 553 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 554 PetscCall(MatInodeGetInodeSizes(sell->A, NULL, &inodes, NULL)); 555 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 556 if (!inodes) { 557 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT ", not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, 558 (PetscInt)info.nz_allocated, (PetscInt)info.memory)); 559 } else { 560 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT ", using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, 561 (PetscInt)info.nz_allocated, (PetscInt)info.memory)); 562 } 563 PetscCall(MatGetInfo(sell->A, MAT_LOCAL, &info)); 564 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 565 PetscCall(MatGetInfo(sell->B, MAT_LOCAL, &info)); 566 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 567 PetscCall(PetscViewerFlush(viewer)); 568 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 569 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 570 PetscCall(VecScatterView(sell->Mvctx, viewer)); 571 PetscFunctionReturn(PETSC_SUCCESS); 572 } else if (format == PETSC_VIEWER_ASCII_INFO) { 573 PetscInt inodecount, inodelimit, *inodes; 574 PetscCall(MatInodeGetInodeSizes(sell->A, &inodecount, &inodes, &inodelimit)); 575 if (inodes) { 576 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 577 } else { 578 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 579 } 580 PetscFunctionReturn(PETSC_SUCCESS); 581 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 582 PetscFunctionReturn(PETSC_SUCCESS); 583 } 584 } else if (isbinary) { 585 if (size == 1) { 586 PetscCall(PetscObjectSetName((PetscObject)sell->A, ((PetscObject)mat)->name)); 587 PetscCall(MatView(sell->A, viewer)); 588 } else { 589 /* PetscCall(MatView_MPISELL_Binary(mat,viewer)); */ 590 } 591 PetscFunctionReturn(PETSC_SUCCESS); 592 } else if (isdraw) { 593 PetscDraw draw; 594 PetscBool isnull; 595 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 596 PetscCall(PetscDrawIsNull(draw, &isnull)); 597 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 598 } 599 600 { 601 /* assemble the entire matrix onto first processor. */ 602 Mat A; 603 Mat_SeqSELL *Aloc; 604 PetscInt M = mat->rmap->N, N = mat->cmap->N, *acolidx, row, col, i, j; 605 MatScalar *aval; 606 PetscBool isnonzero; 607 608 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 609 if (rank == 0) { 610 PetscCall(MatSetSizes(A, M, N, M, N)); 611 } else { 612 PetscCall(MatSetSizes(A, 0, 0, M, N)); 613 } 614 /* This is just a temporary matrix, so explicitly using MATMPISELL is probably best */ 615 PetscCall(MatSetType(A, MATMPISELL)); 616 PetscCall(MatMPISELLSetPreallocation(A, 0, NULL, 0, NULL)); 617 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 618 619 /* copy over the A part */ 620 Aloc = (Mat_SeqSELL *)sell->A->data; 621 acolidx = Aloc->colidx; 622 aval = Aloc->val; 623 for (i = 0; i < Aloc->totalslices; i++) { /* loop over slices */ 624 for (j = Aloc->sliidx[i]; j < Aloc->sliidx[i + 1]; j++) { 625 isnonzero = (PetscBool)((j - Aloc->sliidx[i]) / Aloc->sliceheight < Aloc->rlen[i * Aloc->sliceheight + j % Aloc->sliceheight]); 626 if (isnonzero) { /* check the mask bit */ 627 row = i * Aloc->sliceheight + j % Aloc->sliceheight + mat->rmap->rstart; 628 col = *acolidx + mat->rmap->rstart; 629 PetscCall(MatSetValues(A, 1, &row, 1, &col, aval, INSERT_VALUES)); 630 } 631 aval++; 632 acolidx++; 633 } 634 } 635 636 /* copy over the B part */ 637 Aloc = (Mat_SeqSELL *)sell->B->data; 638 acolidx = Aloc->colidx; 639 aval = Aloc->val; 640 for (i = 0; i < Aloc->totalslices; i++) { 641 for (j = Aloc->sliidx[i]; j < Aloc->sliidx[i + 1]; j++) { 642 isnonzero = (PetscBool)((j - Aloc->sliidx[i]) / Aloc->sliceheight < Aloc->rlen[i * Aloc->sliceheight + j % Aloc->sliceheight]); 643 if (isnonzero) { 644 row = i * Aloc->sliceheight + j % Aloc->sliceheight + mat->rmap->rstart; 645 col = sell->garray[*acolidx]; 646 PetscCall(MatSetValues(A, 1, &row, 1, &col, aval, INSERT_VALUES)); 647 } 648 aval++; 649 acolidx++; 650 } 651 } 652 653 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 654 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 655 /* 656 Everyone has to call to draw the matrix since the graphics waits are 657 synchronized across all processors that share the PetscDraw object 658 */ 659 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 660 if (rank == 0) { 661 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISELL *)A->data)->A, ((PetscObject)mat)->name)); 662 PetscCall(MatView_SeqSELL(((Mat_MPISELL *)A->data)->A, sviewer)); 663 } 664 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 665 PetscCall(MatDestroy(&A)); 666 } 667 PetscFunctionReturn(PETSC_SUCCESS); 668 } 669 670 static PetscErrorCode MatView_MPISELL(Mat mat, PetscViewer viewer) 671 { 672 PetscBool iascii, isdraw, issocket, isbinary; 673 674 PetscFunctionBegin; 675 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 676 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 677 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 678 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 679 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPISELL_ASCIIorDraworSocket(mat, viewer)); 680 PetscFunctionReturn(PETSC_SUCCESS); 681 } 682 683 static PetscErrorCode MatGetGhosts_MPISELL(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 684 { 685 Mat_MPISELL *sell = (Mat_MPISELL *)mat->data; 686 687 PetscFunctionBegin; 688 PetscCall(MatGetSize(sell->B, NULL, nghosts)); 689 if (ghosts) *ghosts = sell->garray; 690 PetscFunctionReturn(PETSC_SUCCESS); 691 } 692 693 static PetscErrorCode MatGetInfo_MPISELL(Mat matin, MatInfoType flag, MatInfo *info) 694 { 695 Mat_MPISELL *mat = (Mat_MPISELL *)matin->data; 696 Mat A = mat->A, B = mat->B; 697 PetscLogDouble isend[5], irecv[5]; 698 699 PetscFunctionBegin; 700 info->block_size = 1.0; 701 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 702 703 isend[0] = info->nz_used; 704 isend[1] = info->nz_allocated; 705 isend[2] = info->nz_unneeded; 706 isend[3] = info->memory; 707 isend[4] = info->mallocs; 708 709 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 710 711 isend[0] += info->nz_used; 712 isend[1] += info->nz_allocated; 713 isend[2] += info->nz_unneeded; 714 isend[3] += info->memory; 715 isend[4] += info->mallocs; 716 if (flag == MAT_LOCAL) { 717 info->nz_used = isend[0]; 718 info->nz_allocated = isend[1]; 719 info->nz_unneeded = isend[2]; 720 info->memory = isend[3]; 721 info->mallocs = isend[4]; 722 } else if (flag == MAT_GLOBAL_MAX) { 723 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 724 725 info->nz_used = irecv[0]; 726 info->nz_allocated = irecv[1]; 727 info->nz_unneeded = irecv[2]; 728 info->memory = irecv[3]; 729 info->mallocs = irecv[4]; 730 } else if (flag == MAT_GLOBAL_SUM) { 731 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 732 733 info->nz_used = irecv[0]; 734 info->nz_allocated = irecv[1]; 735 info->nz_unneeded = irecv[2]; 736 info->memory = irecv[3]; 737 info->mallocs = irecv[4]; 738 } 739 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 740 info->fill_ratio_needed = 0; 741 info->factor_mallocs = 0; 742 PetscFunctionReturn(PETSC_SUCCESS); 743 } 744 745 static PetscErrorCode MatSetOption_MPISELL(Mat A, MatOption op, PetscBool flg) 746 { 747 Mat_MPISELL *a = (Mat_MPISELL *)A->data; 748 749 PetscFunctionBegin; 750 switch (op) { 751 case MAT_NEW_NONZERO_LOCATIONS: 752 case MAT_NEW_NONZERO_ALLOCATION_ERR: 753 case MAT_UNUSED_NONZERO_LOCATION_ERR: 754 case MAT_KEEP_NONZERO_PATTERN: 755 case MAT_NEW_NONZERO_LOCATION_ERR: 756 case MAT_USE_INODES: 757 case MAT_IGNORE_ZERO_ENTRIES: 758 MatCheckPreallocated(A, 1); 759 PetscCall(MatSetOption(a->A, op, flg)); 760 PetscCall(MatSetOption(a->B, op, flg)); 761 break; 762 case MAT_ROW_ORIENTED: 763 MatCheckPreallocated(A, 1); 764 a->roworiented = flg; 765 766 PetscCall(MatSetOption(a->A, op, flg)); 767 PetscCall(MatSetOption(a->B, op, flg)); 768 break; 769 case MAT_FORCE_DIAGONAL_ENTRIES: 770 case MAT_SORTED_FULL: 771 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 772 break; 773 case MAT_IGNORE_OFF_PROC_ENTRIES: 774 a->donotstash = flg; 775 break; 776 case MAT_SPD: 777 case MAT_SPD_ETERNAL: 778 break; 779 case MAT_SYMMETRIC: 780 MatCheckPreallocated(A, 1); 781 PetscCall(MatSetOption(a->A, op, flg)); 782 break; 783 case MAT_STRUCTURALLY_SYMMETRIC: 784 MatCheckPreallocated(A, 1); 785 PetscCall(MatSetOption(a->A, op, flg)); 786 break; 787 case MAT_HERMITIAN: 788 MatCheckPreallocated(A, 1); 789 PetscCall(MatSetOption(a->A, op, flg)); 790 break; 791 case MAT_SYMMETRY_ETERNAL: 792 MatCheckPreallocated(A, 1); 793 PetscCall(MatSetOption(a->A, op, flg)); 794 break; 795 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 796 MatCheckPreallocated(A, 1); 797 PetscCall(MatSetOption(a->A, op, flg)); 798 break; 799 default: 800 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 801 } 802 PetscFunctionReturn(PETSC_SUCCESS); 803 } 804 805 static PetscErrorCode MatDiagonalScale_MPISELL(Mat mat, Vec ll, Vec rr) 806 { 807 Mat_MPISELL *sell = (Mat_MPISELL *)mat->data; 808 Mat a = sell->A, b = sell->B; 809 PetscInt s1, s2, s3; 810 811 PetscFunctionBegin; 812 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 813 if (rr) { 814 PetscCall(VecGetLocalSize(rr, &s1)); 815 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 816 /* Overlap communication with computation. */ 817 PetscCall(VecScatterBegin(sell->Mvctx, rr, sell->lvec, INSERT_VALUES, SCATTER_FORWARD)); 818 } 819 if (ll) { 820 PetscCall(VecGetLocalSize(ll, &s1)); 821 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 822 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 823 } 824 /* scale the diagonal block */ 825 PetscUseTypeMethod(a, diagonalscale, ll, rr); 826 827 if (rr) { 828 /* Do a scatter end and then right scale the off-diagonal block */ 829 PetscCall(VecScatterEnd(sell->Mvctx, rr, sell->lvec, INSERT_VALUES, SCATTER_FORWARD)); 830 PetscUseTypeMethod(b, diagonalscale, NULL, sell->lvec); 831 } 832 PetscFunctionReturn(PETSC_SUCCESS); 833 } 834 835 static PetscErrorCode MatSetUnfactored_MPISELL(Mat A) 836 { 837 Mat_MPISELL *a = (Mat_MPISELL *)A->data; 838 839 PetscFunctionBegin; 840 PetscCall(MatSetUnfactored(a->A)); 841 PetscFunctionReturn(PETSC_SUCCESS); 842 } 843 844 static PetscErrorCode MatEqual_MPISELL(Mat A, Mat B, PetscBool *flag) 845 { 846 Mat_MPISELL *matB = (Mat_MPISELL *)B->data, *matA = (Mat_MPISELL *)A->data; 847 Mat a, b, c, d; 848 PetscBool flg; 849 850 PetscFunctionBegin; 851 a = matA->A; 852 b = matA->B; 853 c = matB->A; 854 d = matB->B; 855 856 PetscCall(MatEqual(a, c, &flg)); 857 if (flg) PetscCall(MatEqual(b, d, &flg)); 858 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 859 PetscFunctionReturn(PETSC_SUCCESS); 860 } 861 862 static PetscErrorCode MatCopy_MPISELL(Mat A, Mat B, MatStructure str) 863 { 864 Mat_MPISELL *a = (Mat_MPISELL *)A->data; 865 Mat_MPISELL *b = (Mat_MPISELL *)B->data; 866 867 PetscFunctionBegin; 868 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 869 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 870 /* because of the column compression in the off-processor part of the matrix a->B, 871 the number of columns in a->B and b->B may be different, hence we cannot call 872 the MatCopy() directly on the two parts. If need be, we can provide a more 873 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 874 then copying the submatrices */ 875 PetscCall(MatCopy_Basic(A, B, str)); 876 } else { 877 PetscCall(MatCopy(a->A, b->A, str)); 878 PetscCall(MatCopy(a->B, b->B, str)); 879 } 880 PetscFunctionReturn(PETSC_SUCCESS); 881 } 882 883 static PetscErrorCode MatSetUp_MPISELL(Mat A) 884 { 885 PetscFunctionBegin; 886 PetscCall(MatMPISELLSetPreallocation(A, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL)); 887 PetscFunctionReturn(PETSC_SUCCESS); 888 } 889 890 static PetscErrorCode MatConjugate_MPISELL(Mat mat) 891 { 892 PetscFunctionBegin; 893 if (PetscDefined(USE_COMPLEX)) { 894 Mat_MPISELL *sell = (Mat_MPISELL *)mat->data; 895 896 PetscCall(MatConjugate_SeqSELL(sell->A)); 897 PetscCall(MatConjugate_SeqSELL(sell->B)); 898 } 899 PetscFunctionReturn(PETSC_SUCCESS); 900 } 901 902 static PetscErrorCode MatRealPart_MPISELL(Mat A) 903 { 904 Mat_MPISELL *a = (Mat_MPISELL *)A->data; 905 906 PetscFunctionBegin; 907 PetscCall(MatRealPart(a->A)); 908 PetscCall(MatRealPart(a->B)); 909 PetscFunctionReturn(PETSC_SUCCESS); 910 } 911 912 static PetscErrorCode MatImaginaryPart_MPISELL(Mat A) 913 { 914 Mat_MPISELL *a = (Mat_MPISELL *)A->data; 915 916 PetscFunctionBegin; 917 PetscCall(MatImaginaryPart(a->A)); 918 PetscCall(MatImaginaryPart(a->B)); 919 PetscFunctionReturn(PETSC_SUCCESS); 920 } 921 922 static PetscErrorCode MatInvertBlockDiagonal_MPISELL(Mat A, const PetscScalar **values) 923 { 924 Mat_MPISELL *a = (Mat_MPISELL *)A->data; 925 926 PetscFunctionBegin; 927 PetscCall(MatInvertBlockDiagonal(a->A, values)); 928 A->factorerrortype = a->A->factorerrortype; 929 PetscFunctionReturn(PETSC_SUCCESS); 930 } 931 932 static PetscErrorCode MatSetRandom_MPISELL(Mat x, PetscRandom rctx) 933 { 934 Mat_MPISELL *sell = (Mat_MPISELL *)x->data; 935 936 PetscFunctionBegin; 937 PetscCall(MatSetRandom(sell->A, rctx)); 938 PetscCall(MatSetRandom(sell->B, rctx)); 939 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 940 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 941 PetscFunctionReturn(PETSC_SUCCESS); 942 } 943 944 static PetscErrorCode MatSetFromOptions_MPISELL(Mat A, PetscOptionItems *PetscOptionsObject) 945 { 946 PetscFunctionBegin; 947 PetscOptionsHeadBegin(PetscOptionsObject, "MPISELL options"); 948 PetscOptionsHeadEnd(); 949 PetscFunctionReturn(PETSC_SUCCESS); 950 } 951 952 static PetscErrorCode MatShift_MPISELL(Mat Y, PetscScalar a) 953 { 954 Mat_MPISELL *msell = (Mat_MPISELL *)Y->data; 955 Mat_SeqSELL *sell = (Mat_SeqSELL *)msell->A->data; 956 957 PetscFunctionBegin; 958 if (!Y->preallocated) { 959 PetscCall(MatMPISELLSetPreallocation(Y, 1, NULL, 0, NULL)); 960 } else if (!sell->nz) { 961 PetscInt nonew = sell->nonew; 962 PetscCall(MatSeqSELLSetPreallocation(msell->A, 1, NULL)); 963 sell->nonew = nonew; 964 } 965 PetscCall(MatShift_Basic(Y, a)); 966 PetscFunctionReturn(PETSC_SUCCESS); 967 } 968 969 static PetscErrorCode MatMissingDiagonal_MPISELL(Mat A, PetscBool *missing, PetscInt *d) 970 { 971 Mat_MPISELL *a = (Mat_MPISELL *)A->data; 972 973 PetscFunctionBegin; 974 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 975 PetscCall(MatMissingDiagonal(a->A, missing, d)); 976 if (d) { 977 PetscInt rstart; 978 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 979 *d += rstart; 980 } 981 PetscFunctionReturn(PETSC_SUCCESS); 982 } 983 984 static PetscErrorCode MatGetDiagonalBlock_MPISELL(Mat A, Mat *a) 985 { 986 PetscFunctionBegin; 987 *a = ((Mat_MPISELL *)A->data)->A; 988 PetscFunctionReturn(PETSC_SUCCESS); 989 } 990 991 static PetscErrorCode MatStoreValues_MPISELL(Mat mat) 992 { 993 Mat_MPISELL *sell = (Mat_MPISELL *)mat->data; 994 995 PetscFunctionBegin; 996 PetscCall(MatStoreValues(sell->A)); 997 PetscCall(MatStoreValues(sell->B)); 998 PetscFunctionReturn(PETSC_SUCCESS); 999 } 1000 1001 static PetscErrorCode MatRetrieveValues_MPISELL(Mat mat) 1002 { 1003 Mat_MPISELL *sell = (Mat_MPISELL *)mat->data; 1004 1005 PetscFunctionBegin; 1006 PetscCall(MatRetrieveValues(sell->A)); 1007 PetscCall(MatRetrieveValues(sell->B)); 1008 PetscFunctionReturn(PETSC_SUCCESS); 1009 } 1010 1011 static PetscErrorCode MatMPISELLSetPreallocation_MPISELL(Mat B, PetscInt d_rlenmax, const PetscInt d_rlen[], PetscInt o_rlenmax, const PetscInt o_rlen[]) 1012 { 1013 Mat_MPISELL *b; 1014 1015 PetscFunctionBegin; 1016 PetscCall(PetscLayoutSetUp(B->rmap)); 1017 PetscCall(PetscLayoutSetUp(B->cmap)); 1018 b = (Mat_MPISELL *)B->data; 1019 1020 if (!B->preallocated) { 1021 /* Explicitly create 2 MATSEQSELL matrices. */ 1022 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 1023 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 1024 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 1025 PetscCall(MatSetType(b->A, MATSEQSELL)); 1026 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 1027 PetscCall(MatSetSizes(b->B, B->rmap->n, B->cmap->N, B->rmap->n, B->cmap->N)); 1028 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 1029 PetscCall(MatSetType(b->B, MATSEQSELL)); 1030 } 1031 1032 PetscCall(MatSeqSELLSetPreallocation(b->A, d_rlenmax, d_rlen)); 1033 PetscCall(MatSeqSELLSetPreallocation(b->B, o_rlenmax, o_rlen)); 1034 B->preallocated = PETSC_TRUE; 1035 B->was_assembled = PETSC_FALSE; 1036 /* 1037 critical for MatAssemblyEnd to work. 1038 MatAssemblyBegin checks it to set up was_assembled 1039 and MatAssemblyEnd checks was_assembled to determine whether to build garray 1040 */ 1041 B->assembled = PETSC_FALSE; 1042 PetscFunctionReturn(PETSC_SUCCESS); 1043 } 1044 1045 static PetscErrorCode MatDuplicate_MPISELL(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 1046 { 1047 Mat mat; 1048 Mat_MPISELL *a, *oldmat = (Mat_MPISELL *)matin->data; 1049 1050 PetscFunctionBegin; 1051 *newmat = NULL; 1052 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 1053 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 1054 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 1055 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 1056 a = (Mat_MPISELL *)mat->data; 1057 1058 mat->factortype = matin->factortype; 1059 mat->assembled = PETSC_TRUE; 1060 mat->insertmode = NOT_SET_VALUES; 1061 mat->preallocated = PETSC_TRUE; 1062 1063 a->size = oldmat->size; 1064 a->rank = oldmat->rank; 1065 a->donotstash = oldmat->donotstash; 1066 a->roworiented = oldmat->roworiented; 1067 a->rowindices = NULL; 1068 a->rowvalues = NULL; 1069 a->getrowactive = PETSC_FALSE; 1070 1071 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 1072 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 1073 1074 if (oldmat->colmap) { 1075 #if defined(PETSC_USE_CTABLE) 1076 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 1077 #else 1078 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 1079 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 1080 #endif 1081 } else a->colmap = NULL; 1082 if (oldmat->garray) { 1083 PetscInt len; 1084 len = oldmat->B->cmap->n; 1085 PetscCall(PetscMalloc1(len + 1, &a->garray)); 1086 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 1087 } else a->garray = NULL; 1088 1089 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 1090 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 1091 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 1092 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 1093 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 1094 *newmat = mat; 1095 PetscFunctionReturn(PETSC_SUCCESS); 1096 } 1097 1098 static const struct _MatOps MatOps_Values = {MatSetValues_MPISELL, 1099 NULL, 1100 NULL, 1101 MatMult_MPISELL, 1102 /* 4*/ MatMultAdd_MPISELL, 1103 MatMultTranspose_MPISELL, 1104 MatMultTransposeAdd_MPISELL, 1105 NULL, 1106 NULL, 1107 NULL, 1108 /*10*/ NULL, 1109 NULL, 1110 NULL, 1111 MatSOR_MPISELL, 1112 NULL, 1113 /*15*/ MatGetInfo_MPISELL, 1114 MatEqual_MPISELL, 1115 MatGetDiagonal_MPISELL, 1116 MatDiagonalScale_MPISELL, 1117 NULL, 1118 /*20*/ MatAssemblyBegin_MPISELL, 1119 MatAssemblyEnd_MPISELL, 1120 MatSetOption_MPISELL, 1121 MatZeroEntries_MPISELL, 1122 /*24*/ NULL, 1123 NULL, 1124 NULL, 1125 NULL, 1126 NULL, 1127 /*29*/ MatSetUp_MPISELL, 1128 NULL, 1129 NULL, 1130 MatGetDiagonalBlock_MPISELL, 1131 NULL, 1132 /*34*/ MatDuplicate_MPISELL, 1133 NULL, 1134 NULL, 1135 NULL, 1136 NULL, 1137 /*39*/ NULL, 1138 NULL, 1139 NULL, 1140 MatGetValues_MPISELL, 1141 MatCopy_MPISELL, 1142 /*44*/ NULL, 1143 MatScale_MPISELL, 1144 MatShift_MPISELL, 1145 MatDiagonalSet_MPISELL, 1146 NULL, 1147 /*49*/ MatSetRandom_MPISELL, 1148 NULL, 1149 NULL, 1150 NULL, 1151 NULL, 1152 /*54*/ MatFDColoringCreate_MPIXAIJ, 1153 NULL, 1154 MatSetUnfactored_MPISELL, 1155 NULL, 1156 NULL, 1157 /*59*/ NULL, 1158 MatDestroy_MPISELL, 1159 MatView_MPISELL, 1160 NULL, 1161 NULL, 1162 /*64*/ NULL, 1163 NULL, 1164 NULL, 1165 NULL, 1166 NULL, 1167 /*69*/ NULL, 1168 NULL, 1169 NULL, 1170 NULL, 1171 NULL, 1172 NULL, 1173 /*75*/ MatFDColoringApply_AIJ, /* reuse AIJ function */ 1174 MatSetFromOptions_MPISELL, 1175 NULL, 1176 NULL, 1177 NULL, 1178 /*80*/ NULL, 1179 NULL, 1180 NULL, 1181 /*83*/ NULL, 1182 NULL, 1183 NULL, 1184 NULL, 1185 NULL, 1186 NULL, 1187 /*89*/ NULL, 1188 NULL, 1189 NULL, 1190 NULL, 1191 NULL, 1192 /*94*/ NULL, 1193 NULL, 1194 NULL, 1195 NULL, 1196 NULL, 1197 /*99*/ NULL, 1198 NULL, 1199 NULL, 1200 MatConjugate_MPISELL, 1201 NULL, 1202 /*104*/ NULL, 1203 MatRealPart_MPISELL, 1204 MatImaginaryPart_MPISELL, 1205 NULL, 1206 NULL, 1207 /*109*/ NULL, 1208 NULL, 1209 NULL, 1210 NULL, 1211 MatMissingDiagonal_MPISELL, 1212 /*114*/ NULL, 1213 NULL, 1214 MatGetGhosts_MPISELL, 1215 NULL, 1216 NULL, 1217 /*119*/ MatMultDiagonalBlock_MPISELL, 1218 NULL, 1219 NULL, 1220 NULL, 1221 NULL, 1222 /*124*/ NULL, 1223 NULL, 1224 MatInvertBlockDiagonal_MPISELL, 1225 NULL, 1226 NULL, 1227 /*129*/ NULL, 1228 NULL, 1229 NULL, 1230 NULL, 1231 NULL, 1232 /*134*/ NULL, 1233 NULL, 1234 NULL, 1235 NULL, 1236 NULL, 1237 /*139*/ NULL, 1238 NULL, 1239 NULL, 1240 MatFDColoringSetUp_MPIXAIJ, 1241 NULL, 1242 /*144*/ NULL, 1243 NULL, 1244 NULL, 1245 NULL, 1246 NULL, 1247 NULL, 1248 /*150*/ NULL, 1249 NULL, 1250 NULL, 1251 NULL, 1252 NULL, 1253 /*155*/ NULL, 1254 NULL}; 1255 1256 /*@C 1257 MatMPISELLSetPreallocation - Preallocates memory for a `MATMPISELL` sparse parallel matrix in sell format. 1258 For good matrix assembly performance the user should preallocate the matrix storage by 1259 setting the parameters `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 1260 1261 Collective 1262 1263 Input Parameters: 1264 + B - the matrix 1265 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1266 (same value is used for all local rows) 1267 . d_nnz - array containing the number of nonzeros in the various rows of the 1268 DIAGONAL portion of the local submatrix (possibly different for each row) 1269 or NULL (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 1270 The size of this array is equal to the number of local rows, i.e 'm'. 1271 For matrices that will be factored, you must leave room for (and set) 1272 the diagonal entry even if it is zero. 1273 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 1274 submatrix (same value is used for all local rows). 1275 - o_nnz - array containing the number of nonzeros in the various rows of the 1276 OFF-DIAGONAL portion of the local submatrix (possibly different for 1277 each row) or NULL (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 1278 structure. The size of this array is equal to the number 1279 of local rows, i.e 'm'. 1280 1281 Example usage: 1282 Consider the following 8x8 matrix with 34 non-zero values, that is 1283 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 1284 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 1285 as follows 1286 1287 .vb 1288 1 2 0 | 0 3 0 | 0 4 1289 Proc0 0 5 6 | 7 0 0 | 8 0 1290 9 0 10 | 11 0 0 | 12 0 1291 ------------------------------------- 1292 13 0 14 | 15 16 17 | 0 0 1293 Proc1 0 18 0 | 19 20 21 | 0 0 1294 0 0 0 | 22 23 0 | 24 0 1295 ------------------------------------- 1296 Proc2 25 26 27 | 0 0 28 | 29 0 1297 30 0 0 | 31 32 33 | 0 34 1298 .ve 1299 1300 This can be represented as a collection of submatrices as 1301 1302 .vb 1303 A B C 1304 D E F 1305 G H I 1306 .ve 1307 1308 Where the submatrices A,B,C are owned by proc0, D,E,F are 1309 owned by proc1, G,H,I are owned by proc2. 1310 1311 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1312 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1313 The 'M','N' parameters are 8,8, and have the same values on all procs. 1314 1315 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 1316 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 1317 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 1318 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 1319 part as `MATSEQSELL` matrices. For example, proc1 will store [E] as a `MATSEQSELL` 1320 matrix, and [DF] as another SeqSELL matrix. 1321 1322 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 1323 allocated for every row of the local DIAGONAL submatrix, and o_nz 1324 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 1325 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 1326 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 1327 In this case, the values of d_nz,o_nz are 1328 .vb 1329 proc0 dnz = 2, o_nz = 2 1330 proc1 dnz = 3, o_nz = 2 1331 proc2 dnz = 1, o_nz = 4 1332 .ve 1333 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 1334 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 1335 for proc3. i.e we are using 12+15+10=37 storage locations to store 1336 34 values. 1337 1338 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 1339 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 1340 In the above case the values for d_nnz,o_nnz are 1341 .vb 1342 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 1343 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 1344 proc2 d_nnz = [1,1] and o_nnz = [4,4] 1345 .ve 1346 Here the space allocated is according to nz (or maximum values in the nnz 1347 if nnz is provided) for DIAGONAL and OFF-DIAGONAL submatrices, i.e (2+2+3+2)*3+(1+4)*2=37 1348 1349 Level: intermediate 1350 1351 Notes: 1352 If the *_nnz parameter is given then the *_nz parameter is ignored 1353 1354 The stored row and column indices begin with zero. 1355 1356 The parallel matrix is partitioned such that the first m0 rows belong to 1357 process 0, the next m1 rows belong to process 1, the next m2 rows belong 1358 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 1359 1360 The DIAGONAL portion of the local submatrix of a processor can be defined 1361 as the submatrix which is obtained by extraction the part corresponding to 1362 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 1363 first row that belongs to the processor, r2 is the last row belonging to 1364 the this processor, and c1-c2 is range of indices of the local part of a 1365 vector suitable for applying the matrix to. This is an mxn matrix. In the 1366 common case of a square matrix, the row and column ranges are the same and 1367 the DIAGONAL part is also square. The remaining portion of the local 1368 submatrix (mxN) constitute the OFF-DIAGONAL portion. 1369 1370 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 1371 1372 You can call `MatGetInfo()` to get information on how effective the preallocation was; 1373 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 1374 You can also run with the option -info and look for messages with the string 1375 malloc in them to see if additional memory allocation was needed. 1376 1377 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqSELL()`, `MatSetValues()`, `MatCreateSELL()`, 1378 `MATMPISELL`, `MatGetInfo()`, `PetscSplitOwnership()`, `MATSELL` 1379 @*/ 1380 PetscErrorCode MatMPISELLSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 1381 { 1382 PetscFunctionBegin; 1383 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 1384 PetscValidType(B, 1); 1385 PetscTryMethod(B, "MatMPISELLSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 1386 PetscFunctionReturn(PETSC_SUCCESS); 1387 } 1388 1389 /*MC 1390 MATMPISELL - MATMPISELL = "mpisell" - A matrix type to be used for MPI sparse matrices, 1391 based on the sliced Ellpack format 1392 1393 Options Database Key: 1394 . -mat_type sell - sets the matrix type to `MATSELL` during a call to `MatSetFromOptions()` 1395 1396 Level: beginner 1397 1398 .seealso: `Mat`, `MatCreateSELL()`, `MATSEQSELL`, `MATSELL`, `MATSEQAIJ`, `MATAIJ`, `MATMPIAIJ` 1399 M*/ 1400 1401 /*@C 1402 MatCreateSELL - Creates a sparse parallel matrix in `MATSELL` format. 1403 1404 Collective 1405 1406 Input Parameters: 1407 + comm - MPI communicator 1408 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 1409 This value should be the same as the local size used in creating the 1410 y vector for the matrix-vector product y = Ax. 1411 . n - This value should be the same as the local size used in creating the 1412 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 1413 calculated if `N` is given) For square matrices n is almost always `m`. 1414 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 1415 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 1416 . d_rlenmax - max number of nonzeros per row in DIAGONAL portion of local submatrix 1417 (same value is used for all local rows) 1418 . d_rlen - array containing the number of nonzeros in the various rows of the 1419 DIAGONAL portion of the local submatrix (possibly different for each row) 1420 or `NULL`, if d_rlenmax is used to specify the nonzero structure. 1421 The size of this array is equal to the number of local rows, i.e `m`. 1422 . o_rlenmax - max number of nonzeros per row in the OFF-DIAGONAL portion of local 1423 submatrix (same value is used for all local rows). 1424 - o_rlen - array containing the number of nonzeros in the various rows of the 1425 OFF-DIAGONAL portion of the local submatrix (possibly different for 1426 each row) or `NULL`, if `o_rlenmax` is used to specify the nonzero 1427 structure. The size of this array is equal to the number 1428 of local rows, i.e `m`. 1429 1430 Output Parameter: 1431 . A - the matrix 1432 1433 Options Database Key: 1434 . -mat_sell_oneindex - Internally use indexing starting at 1 1435 rather than 0. When calling `MatSetValues()`, 1436 the user still MUST index entries starting at 0! 1437 1438 Example: 1439 Consider the following 8x8 matrix with 34 non-zero values, that is 1440 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 1441 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 1442 as follows 1443 1444 .vb 1445 1 2 0 | 0 3 0 | 0 4 1446 Proc0 0 5 6 | 7 0 0 | 8 0 1447 9 0 10 | 11 0 0 | 12 0 1448 ------------------------------------- 1449 13 0 14 | 15 16 17 | 0 0 1450 Proc1 0 18 0 | 19 20 21 | 0 0 1451 0 0 0 | 22 23 0 | 24 0 1452 ------------------------------------- 1453 Proc2 25 26 27 | 0 0 28 | 29 0 1454 30 0 0 | 31 32 33 | 0 34 1455 .ve 1456 1457 This can be represented as a collection of submatrices as 1458 .vb 1459 A B C 1460 D E F 1461 G H I 1462 .ve 1463 1464 Where the submatrices A,B,C are owned by proc0, D,E,F are 1465 owned by proc1, G,H,I are owned by proc2. 1466 1467 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1468 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1469 The 'M','N' parameters are 8,8, and have the same values on all procs. 1470 1471 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 1472 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 1473 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 1474 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 1475 part as `MATSEQSELL` matrices. For example, proc1 will store [E] as a `MATSEQSELL` 1476 matrix, and [DF] as another `MATSEQSELL` matrix. 1477 1478 When d_rlenmax, o_rlenmax parameters are specified, d_rlenmax storage elements are 1479 allocated for every row of the local DIAGONAL submatrix, and o_rlenmax 1480 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 1481 One way to choose `d_rlenmax` and `o_rlenmax` is to use the maximum number of nonzeros over 1482 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 1483 In this case, the values of d_rlenmax,o_rlenmax are 1484 .vb 1485 proc0 - d_rlenmax = 2, o_rlenmax = 2 1486 proc1 - d_rlenmax = 3, o_rlenmax = 2 1487 proc2 - d_rlenmax = 1, o_rlenmax = 4 1488 .ve 1489 We are allocating m*(d_rlenmax+o_rlenmax) storage locations for every proc. This 1490 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 1491 for proc3. i.e we are using 12+15+10=37 storage locations to store 1492 34 values. 1493 1494 When `d_rlen`, `o_rlen` parameters are specified, the storage is specified 1495 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 1496 In the above case the values for `d_nnz`, `o_nnz` are 1497 .vb 1498 proc0 - d_nnz = [2,2,2] and o_nnz = [2,2,2] 1499 proc1 - d_nnz = [3,3,2] and o_nnz = [2,1,1] 1500 proc2 - d_nnz = [1,1] and o_nnz = [4,4] 1501 .ve 1502 Here the space allocated is still 37 though there are 34 nonzeros because 1503 the allocation is always done according to rlenmax. 1504 1505 Level: intermediate 1506 1507 Notes: 1508 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 1509 MatXXXXSetPreallocation() paradigm instead of this routine directly. 1510 [MatXXXXSetPreallocation() is, for example, `MatSeqSELLSetPreallocation()`] 1511 1512 If the *_rlen parameter is given then the *_rlenmax parameter is ignored 1513 1514 `m`, `n`, `M`, `N` parameters specify the size of the matrix, and its partitioning across 1515 processors, while `d_rlenmax`, `d_rlen`, `o_rlenmax` , `o_rlen` parameters specify the approximate 1516 storage requirements for this matrix. 1517 1518 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 1519 processor than it must be used on all processors that share the object for 1520 that argument. 1521 1522 The user MUST specify either the local or global matrix dimensions 1523 (possibly both). 1524 1525 The parallel matrix is partitioned across processors such that the 1526 first m0 rows belong to process 0, the next m1 rows belong to 1527 process 1, the next m2 rows belong to process 2 etc.. where 1528 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 1529 values corresponding to [`m` x `N`] submatrix. 1530 1531 The columns are logically partitioned with the n0 columns belonging 1532 to 0th partition, the next n1 columns belonging to the next 1533 partition etc.. where n0,n1,n2... are the input parameter `n`. 1534 1535 The DIAGONAL portion of the local submatrix on any given processor 1536 is the submatrix corresponding to the rows and columns `m`, `n` 1537 corresponding to the given processor. i.e diagonal matrix on 1538 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 1539 etc. The remaining portion of the local submatrix [m x (N-n)] 1540 constitute the OFF-DIAGONAL portion. The example below better 1541 illustrates this concept. 1542 1543 For a square global matrix we define each processor's diagonal portion 1544 to be its local rows and the corresponding columns (a square submatrix); 1545 each processor's off-diagonal portion encompasses the remainder of the 1546 local matrix (a rectangular submatrix). 1547 1548 If `o_rlen`, `d_rlen` are specified, then `o_rlenmax`, and `d_rlenmax` are ignored. 1549 1550 When calling this routine with a single process communicator, a matrix of 1551 type `MATSEQSELL` is returned. If a matrix of type `MATMPISELL` is desired for this 1552 type of communicator, use the construction mechanism 1553 .vb 1554 MatCreate(...,&A); 1555 MatSetType(A,MATMPISELL); 1556 MatSetSizes(A, m,n,M,N); 1557 MatMPISELLSetPreallocation(A,...); 1558 .ve 1559 1560 .seealso: `Mat`, `MATSELL`, `MatCreate()`, `MatCreateSeqSELL()`, `MatSetValues()`, `MatMPISELLSetPreallocation()`, `MATMPISELL` 1561 @*/ 1562 PetscErrorCode MatCreateSELL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_rlenmax, const PetscInt d_rlen[], PetscInt o_rlenmax, const PetscInt o_rlen[], Mat *A) 1563 { 1564 PetscMPIInt size; 1565 1566 PetscFunctionBegin; 1567 PetscCall(MatCreate(comm, A)); 1568 PetscCall(MatSetSizes(*A, m, n, M, N)); 1569 PetscCallMPI(MPI_Comm_size(comm, &size)); 1570 if (size > 1) { 1571 PetscCall(MatSetType(*A, MATMPISELL)); 1572 PetscCall(MatMPISELLSetPreallocation(*A, d_rlenmax, d_rlen, o_rlenmax, o_rlen)); 1573 } else { 1574 PetscCall(MatSetType(*A, MATSEQSELL)); 1575 PetscCall(MatSeqSELLSetPreallocation(*A, d_rlenmax, d_rlen)); 1576 } 1577 PetscFunctionReturn(PETSC_SUCCESS); 1578 } 1579 1580 /*@C 1581 MatMPISELLGetSeqSELL - Returns the local pieces of this distributed matrix 1582 1583 Not Collective 1584 1585 Input Parameter: 1586 . A - the `MATMPISELL` matrix 1587 1588 Output Parameters: 1589 + Ad - The diagonal portion of `A` 1590 . Ao - The off-diagonal portion of `A` 1591 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 1592 1593 Level: advanced 1594 1595 .seealso: `Mat`, `MATSEQSELL`, `MATMPISELL` 1596 @*/ 1597 PetscErrorCode MatMPISELLGetSeqSELL(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 1598 { 1599 Mat_MPISELL *a = (Mat_MPISELL *)A->data; 1600 PetscBool flg; 1601 1602 PetscFunctionBegin; 1603 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPISELL, &flg)); 1604 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPISELL matrix as input"); 1605 if (Ad) *Ad = a->A; 1606 if (Ao) *Ao = a->B; 1607 if (colmap) *colmap = a->garray; 1608 PetscFunctionReturn(PETSC_SUCCESS); 1609 } 1610 1611 /*@C 1612 MatMPISELLGetLocalMatCondensed - Creates a `MATSEQSELL` matrix from an `MATMPISELL` matrix by 1613 taking all its local rows and NON-ZERO columns 1614 1615 Not Collective 1616 1617 Input Parameters: 1618 + A - the matrix 1619 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 1620 . row - index sets of rows to extract (or `NULL`) 1621 - col - index sets of columns to extract (or `NULL`) 1622 1623 Output Parameter: 1624 . A_loc - the local sequential matrix generated 1625 1626 Level: advanced 1627 1628 .seealso: `Mat`, `MATSEQSELL`, `MATMPISELL`, `MatGetOwnershipRange()`, `MatMPISELLGetLocalMat()` 1629 @*/ 1630 PetscErrorCode MatMPISELLGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 1631 { 1632 Mat_MPISELL *a = (Mat_MPISELL *)A->data; 1633 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 1634 IS isrowa, iscola; 1635 Mat *aloc; 1636 PetscBool match; 1637 1638 PetscFunctionBegin; 1639 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPISELL, &match)); 1640 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPISELL matrix as input"); 1641 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 1642 if (!row) { 1643 start = A->rmap->rstart; 1644 end = A->rmap->rend; 1645 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 1646 } else { 1647 isrowa = *row; 1648 } 1649 if (!col) { 1650 start = A->cmap->rstart; 1651 cmap = a->garray; 1652 nzA = a->A->cmap->n; 1653 nzB = a->B->cmap->n; 1654 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 1655 ncols = 0; 1656 for (i = 0; i < nzB; i++) { 1657 if (cmap[i] < start) idx[ncols++] = cmap[i]; 1658 else break; 1659 } 1660 imark = i; 1661 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 1662 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 1663 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 1664 } else { 1665 iscola = *col; 1666 } 1667 if (scall != MAT_INITIAL_MATRIX) { 1668 PetscCall(PetscMalloc1(1, &aloc)); 1669 aloc[0] = *A_loc; 1670 } 1671 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 1672 *A_loc = aloc[0]; 1673 PetscCall(PetscFree(aloc)); 1674 if (!row) PetscCall(ISDestroy(&isrowa)); 1675 if (!col) PetscCall(ISDestroy(&iscola)); 1676 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 1677 PetscFunctionReturn(PETSC_SUCCESS); 1678 } 1679 1680 #include <../src/mat/impls/aij/mpi/mpiaij.h> 1681 1682 PetscErrorCode MatConvert_MPISELL_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1683 { 1684 Mat_MPISELL *a = (Mat_MPISELL *)A->data; 1685 Mat B; 1686 Mat_MPIAIJ *b; 1687 1688 PetscFunctionBegin; 1689 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled"); 1690 1691 if (reuse == MAT_REUSE_MATRIX) { 1692 B = *newmat; 1693 } else { 1694 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1695 PetscCall(MatSetType(B, MATMPIAIJ)); 1696 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 1697 PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs)); 1698 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 1699 PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL)); 1700 } 1701 b = (Mat_MPIAIJ *)B->data; 1702 1703 if (reuse == MAT_REUSE_MATRIX) { 1704 PetscCall(MatConvert_SeqSELL_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A)); 1705 PetscCall(MatConvert_SeqSELL_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B)); 1706 } else { 1707 PetscCall(MatDestroy(&b->A)); 1708 PetscCall(MatDestroy(&b->B)); 1709 PetscCall(MatDisAssemble_MPISELL(A)); 1710 PetscCall(MatConvert_SeqSELL_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A)); 1711 PetscCall(MatConvert_SeqSELL_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B)); 1712 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1713 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1714 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1715 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1716 } 1717 1718 if (reuse == MAT_INPLACE_MATRIX) { 1719 PetscCall(MatHeaderReplace(A, &B)); 1720 } else { 1721 *newmat = B; 1722 } 1723 PetscFunctionReturn(PETSC_SUCCESS); 1724 } 1725 1726 PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 1727 { 1728 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1729 Mat B; 1730 Mat_MPISELL *b; 1731 1732 PetscFunctionBegin; 1733 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled"); 1734 1735 if (reuse == MAT_REUSE_MATRIX) { 1736 B = *newmat; 1737 } else { 1738 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)a->A->data, *Ba = (Mat_SeqAIJ *)a->B->data; 1739 PetscInt i, d_nz = 0, o_nz = 0, m = A->rmap->N, n = A->cmap->N, lm = A->rmap->n, ln = A->cmap->n; 1740 PetscInt *d_nnz, *o_nnz; 1741 PetscCall(PetscMalloc2(lm, &d_nnz, lm, &o_nnz)); 1742 for (i = 0; i < lm; i++) { 1743 d_nnz[i] = Aa->i[i + 1] - Aa->i[i]; 1744 o_nnz[i] = Ba->i[i + 1] - Ba->i[i]; 1745 if (d_nnz[i] > d_nz) d_nz = d_nnz[i]; 1746 if (o_nnz[i] > o_nz) o_nz = o_nnz[i]; 1747 } 1748 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1749 PetscCall(MatSetType(B, MATMPISELL)); 1750 PetscCall(MatSetSizes(B, lm, ln, m, n)); 1751 PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs)); 1752 PetscCall(MatSeqSELLSetPreallocation(B, d_nz, d_nnz)); 1753 PetscCall(MatMPISELLSetPreallocation(B, d_nz, d_nnz, o_nz, o_nnz)); 1754 PetscCall(PetscFree2(d_nnz, o_nnz)); 1755 } 1756 b = (Mat_MPISELL *)B->data; 1757 1758 if (reuse == MAT_REUSE_MATRIX) { 1759 PetscCall(MatConvert_SeqAIJ_SeqSELL(a->A, MATSEQSELL, MAT_REUSE_MATRIX, &b->A)); 1760 PetscCall(MatConvert_SeqAIJ_SeqSELL(a->B, MATSEQSELL, MAT_REUSE_MATRIX, &b->B)); 1761 } else { 1762 PetscCall(MatDestroy(&b->A)); 1763 PetscCall(MatDestroy(&b->B)); 1764 PetscCall(MatConvert_SeqAIJ_SeqSELL(a->A, MATSEQSELL, MAT_INITIAL_MATRIX, &b->A)); 1765 PetscCall(MatConvert_SeqAIJ_SeqSELL(a->B, MATSEQSELL, MAT_INITIAL_MATRIX, &b->B)); 1766 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1767 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1768 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1769 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1770 } 1771 1772 if (reuse == MAT_INPLACE_MATRIX) { 1773 PetscCall(MatHeaderReplace(A, &B)); 1774 } else { 1775 *newmat = B; 1776 } 1777 PetscFunctionReturn(PETSC_SUCCESS); 1778 } 1779 1780 PetscErrorCode MatSOR_MPISELL(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1781 { 1782 Mat_MPISELL *mat = (Mat_MPISELL *)matin->data; 1783 Vec bb1 = NULL; 1784 1785 PetscFunctionBegin; 1786 if (flag == SOR_APPLY_UPPER) { 1787 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1788 PetscFunctionReturn(PETSC_SUCCESS); 1789 } 1790 1791 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1792 1793 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1794 if (flag & SOR_ZERO_INITIAL_GUESS) { 1795 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1796 its--; 1797 } 1798 1799 while (its--) { 1800 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1801 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1802 1803 /* update rhs: bb1 = bb - B*x */ 1804 PetscCall(VecScale(mat->lvec, -1.0)); 1805 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1806 1807 /* local sweep */ 1808 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1809 } 1810 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1811 if (flag & SOR_ZERO_INITIAL_GUESS) { 1812 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1813 its--; 1814 } 1815 while (its--) { 1816 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1817 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1818 1819 /* update rhs: bb1 = bb - B*x */ 1820 PetscCall(VecScale(mat->lvec, -1.0)); 1821 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1822 1823 /* local sweep */ 1824 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1825 } 1826 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1827 if (flag & SOR_ZERO_INITIAL_GUESS) { 1828 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1829 its--; 1830 } 1831 while (its--) { 1832 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1833 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1834 1835 /* update rhs: bb1 = bb - B*x */ 1836 PetscCall(VecScale(mat->lvec, -1.0)); 1837 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1838 1839 /* local sweep */ 1840 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1841 } 1842 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1843 1844 PetscCall(VecDestroy(&bb1)); 1845 1846 matin->factorerrortype = mat->A->factorerrortype; 1847 PetscFunctionReturn(PETSC_SUCCESS); 1848 } 1849 1850 #if defined(PETSC_HAVE_CUDA) 1851 PETSC_INTERN PetscErrorCode MatConvert_MPISELL_MPISELLCUDA(Mat, MatType, MatReuse, Mat *); 1852 #endif 1853 1854 /*MC 1855 MATMPISELL - MATMPISELL = "MPISELL" - A matrix type to be used for parallel sparse matrices. 1856 1857 Options Database Keys: 1858 . -mat_type mpisell - sets the matrix type to `MATMPISELL` during a call to `MatSetFromOptions()` 1859 1860 Level: beginner 1861 1862 .seealso: `Mat`, `MATSELL`, `MATSEQSELL` `MatCreateSELL()` 1863 M*/ 1864 PETSC_EXTERN PetscErrorCode MatCreate_MPISELL(Mat B) 1865 { 1866 Mat_MPISELL *b; 1867 PetscMPIInt size; 1868 1869 PetscFunctionBegin; 1870 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 1871 PetscCall(PetscNew(&b)); 1872 B->data = (void *)b; 1873 B->ops[0] = MatOps_Values; 1874 B->assembled = PETSC_FALSE; 1875 B->insertmode = NOT_SET_VALUES; 1876 b->size = size; 1877 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 1878 /* build cache for off array entries formed */ 1879 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 1880 1881 b->donotstash = PETSC_FALSE; 1882 b->colmap = NULL; 1883 b->garray = NULL; 1884 b->roworiented = PETSC_TRUE; 1885 1886 /* stuff used for matrix vector multiply */ 1887 b->lvec = NULL; 1888 b->Mvctx = NULL; 1889 1890 /* stuff for MatGetRow() */ 1891 b->rowindices = NULL; 1892 b->rowvalues = NULL; 1893 b->getrowactive = PETSC_FALSE; 1894 1895 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISELL)); 1896 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISELL)); 1897 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPISELL)); 1898 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISELLSetPreallocation_C", MatMPISELLSetPreallocation_MPISELL)); 1899 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisell_mpiaij_C", MatConvert_MPISELL_MPIAIJ)); 1900 #if defined(PETSC_HAVE_CUDA) 1901 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisell_mpisellcuda_C", MatConvert_MPISELL_MPISELLCUDA)); 1902 #endif 1903 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPISELL)); 1904 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISELL)); 1905 PetscFunctionReturn(PETSC_SUCCESS); 1906 } 1907